def test_item_get_store_retrieve(): """Tests the item's whole lifecycle - getting, storing and retrieving.""" # Initialise DB connection db = DB(db_url='sqlite:///:memory:') db.db.row_type = NoNoneOrderedDict # Test with the first item only resp = flatdict.FlatDict(cls.session.get(cls.items_url.format(1)).json()) # Add item to the db (copy as lists modified in-place; not what we want) db.add_record(copy.copy(resp)) # Insert into DB db.commit() # Retrieve and compare to original to ensure data integrity is maintained # specifically foreign keys record = list(db.retrieve_records())[0] # Ensure keys are identical assert sorted(record.keys()) == sorted(resp.keys()) for items in resp: # Change the foreign items to match that of the retrieved records if type(resp[items]) == list: resp[items] = [ dict(flatdict.FlatDict(item)) for item in resp[items] ] # Simplest way to check unordered lists items assert record[items] == resp[items] or \ min([item in record[items] for item in resp[items]]) else: # Check the keys against each other to ensure they are identical assert record[items] == resp[items]
def execute(self, using=None, result_class=SQLResult): def extract_subcontexts(values): subcontexts = [] for value in values: try: subctx = value.context except AttributeError: pass else: subcontexts.append(subctx) subcontexts += extract_subcontexts(subctx.values()) return subcontexts ctx = {} for subcontext in extract_subcontexts(self._context.values()): ctx.update(subcontext) ctx.update(self._context) query_params = flatdict.FlatDict(ctx, delimiter='.').as_dict() cur = self._adapter.do_query(sql=self.sql, query_params=query_params, using=using or self._using) return result_class(cur)
def test_update_flat(self): expectation = flatdict.FlatDict({ 'foo:bar:baz': 4, 'foo:bar:qux': 5, 'foo:bar:corge': 6, 'foo:grault:baz': 3, 'foo:grault:qux': 4, 'foo:grault:corge': 5, 'garply:foo': 0, 'garply:bar': 1, 'garply:baz': 2, 'garply:qux:corge': 3, 'foo:list:0': 'F', 'foo:list:1': 'O', 'foo:list:2': 'O', 'foo:tuple:0': 'F', 'foo:tuple:1': 0, 'foo:tuple:2': 0 }) self.object.update({ 'foo:bar:baz': 4, 'foo:bar:qux': 5, 'foo:bar:corge': 6 }) self.assertDictEqual(self.object, expectation)
def test_update_flat(self): expectation = flatdict.FlatDict({ 'foo-bar-baz': 4, 'foo-bar-qux': 5, 'foo-bar-corge': 6, 'foo-grault-baz': 3, 'foo-grault-qux': 4, 'foo-grault-corge': 5, 'garply-foo': 0, 'garply-bar': 1, 'garply-baz': 2, 'garply-qux-corge': 3, 'foo-list-0': 'F', 'foo-list-1': 'O', 'foo-list-2': 'O', 'foo-tuple-0': 'F', 'foo-tuple-1': 0, 'foo-tuple-2': 0 }) self.object.update({ 'foo-bar-baz': 4, 'foo-bar-qux': 5, 'foo-bar-corge': 6 }) self.assertDictEqual(self.object, expectation)
def makeRNdataframe(scoreERS_output, ifPrint=1, filename="scoreERS.csv"): # Utility to processs data from pickles generated by BONITA """Takes the three-layered dictionary generated by scoreERS as input""" allRes_flat = flatdict.FlatDict(scoreERS_output) allRes_df = pd.DataFrame(allRes_flat.iteritems()) allRes_df[[ "ruleLengths", "rnAllNodes", "ImportanceVals", "inDegree", "scoreFunction6" ]] = pd.DataFrame( [item for sublist in allRes_df[[1]].values for item in sublist], index=allRes_df.index) allRes_df[["Pathway", "Iteration", "Node"]] = pd.DataFrame( [x[0].split(":", 2) for x in allRes_df[[0]].values], index=allRes_df.index) allRes_df[["ruleLengths", "ImportanceVals", "inDegree", "scoreFunction6"]] = allRes_df[[ "ruleLengths", "ImportanceVals", "inDegree", "scoreFunction6" ]].apply(pd.to_numeric, axis=1) if ifPrint: # or if filename: resFile = open(str(filename), "w+") allRes_df.to_csv(resFile) resFile.close() return (allRes_df)
def process_item_data(self, db, ref, response): record = {} # Add items to record ... # Add to DB db.add_record(flatdict.FlatDict(record))
def influxdb_to_json(self, sol_influxdb): """ Converts an Influxdb query reply into a list of dicts. :param sol_influxdb dict: the result of a database query (sush as SELECT * FROM) :return: a list of JSON SOL objects :rtype: list """ # verify influxdb data if not ("series" in sol_influxdb): raise ValueError("Influxdb data not recognized") # init json_list = [] # remove unused headers for serie in sol_influxdb["series"]: for val in serie['values']: # convert to dict d_influxdb = dict(zip(serie['columns'], val)) # unflat dict obj_value = flatdict.FlatDict(d_influxdb).as_dict() # parse specific HR_NEIGHBORS hr_nghb_name = SolDefines.solTypeToTypeName( SolDefines, SolDefines.SOL_TYPE_DUST_NOTIF_HRNEIGHBORS) if serie['name'] == hr_nghb_name: for i in range(0, len(obj_value["neighbors"]) + 1): ngbr_id = str(i) # new HR_NGBR parsing if ngbr_id in obj_value["neighbors"]: if obj_value["neighbors"][ngbr_id][ "neighborFlag"] is None: del obj_value["neighbors"][ngbr_id] # old HR_NGBR parsing if ngbr_id in obj_value: if obj_value[ngbr_id]["neighborFlag"] is not None: obj_value["neighbors"][ngbr_id] = obj_value[ ngbr_id] del obj_value[ngbr_id] # time is not passed in the "value" field del obj_value["time"] # create final dict jdic = { 'type': serie['name'], 'mac': serie['tags']['mac'], 'value': obj_value, 'timestamp': d_influxdb['time'], } json_list.append(jdic) return json_list
def _default_configuration(): """Return the default configuration for Helper :rtype: dict """ return flatdict.FlatDict({ 'Application': APPLICATION, 'Daemon': DAEMON, 'Logging': LOGGING })
def __init__(self, dictionary): """Instantiate a Patcher instance. Args: dictionary (dict): The dictionary to patch. """ self._fdict = flatdict.FlatDict(dictionary) self._logger = logger.get_logger('{0}.api.patcher.Patcher'.format( constants.PROGRAM_NAME))
def get_error(tree, examples, attributes): for k in tree.keys(): first = k x, y = len(tree[first].keys()), 2 label_listA = [[0 for i in range(y)] for j in range(x)] label_listB = [None] * len(tree[first].keys()) i = 0 j = 0 for v in tree[first].values(): if type(v) is not str: flat_labels = flatdict.FlatDict(v) label_listA[j][0] = flat_labels.keys() label_listA[j][1] = flat_labels.values() j += 1 else: label_listB[i] = v i += 1 print(label_listA) final_watch = [] c = [] total_error = 0.0 count = 0 for i in range(0, len(label_listA[0][0])): labelstmp = label_listA[0][0][i] labels = labelstmp.split(":") for j in labels: if j in attributes: labels.remove(j) for k in examples: if set(labels) < set(k) and k[0] != label_listA[0][1][i]: total_error += 1 count += 1 watch = 1 - (total_error / count) final_watch.append(watch) c.append(count) else: count += 1 print(final_watch) print(c) final = 1 - (total_error / count) print(final) return (final_watch)
def _make_table(json_table: str) -> str: # Load the created json as dict to_table = json.loads(json_table) # Transform it into a flat dictionary to_table = flatdict.FlatDict(to_table) # Create an array where the first row are the keys # the other rows are the value to_table = [to_table.keys(), to_table.values()] return tabulate(to_table, headers='firstrow', tablefmt='fancy_grid')
def _load_config_file(self): """Load the configuration file into memory, returning the content. """ LOGGER.info('Loading configuration from %s', self._file_path) if self._file_path.endswith('json'): config = self._load_json_config() else: config = self._load_yaml_config() for key, value in [(k, v) for k, v in config.items()]: if key.title() != key: config[key.title()] = value del config[key] return flatdict.FlatDict(config)
def parse_the_json(self, dev, root): """ Parse JSON data The parse_the_json() method contains the steps to convert the JSON file into a flat dict. http://github.com/gmr/flatdict class flatdict.FlatDict(value=None, delimiter=None, former_type=<type 'dict'>) ----- :param dev: :param root: :return self.jsonRawData: """ try: parsed_simplejson = simplejson.loads(root) # If List flattens once - with addition of No_ to the beginning (Indigo appears # to not allow DeviceNames to start with Numbers) then flatDict runs - and # appears to run correctly (as no longer list - dict) if isinstance(list) then # will flatten list down to dict. if isinstance(parsed_simplejson, list): parsed_simplejson = dict( (u"No_" + unicode(i), v) for (i, v) in enumerate(parsed_simplejson)) self.jsonRawData = flatdict.FlatDict(parsed_simplejson, delimiter='_ghostxml_') return self.jsonRawData except ValueError as sub_error: self.host_plugin.logger.debug(u"[{0}] Parse Error: {1}".format( dev.id, sub_error)) self.host_plugin.logger.debug(u"[{0}] jsonRawData {0}".format( dev.id, self.jsonRawData)) # If we let it, an exception here will kill the device's thread. Therefore, we # have to return something that the device can use in order to keep the thread # alive. self.jsonRawData = { 'parse_error': "There was a parse error. Will continue to poll." } return self.jsonRawData
def make_usable(city): """ A quick function to dump the specified collection into an array and hand it back to be usable with sklearn's feature extraction module. Arguments: * city - a string indicating which city collection to make usable """ # open up the MongoDB conn = pymongo.MongoClient() db = conn.distribution_center # verify which collection if city == 'washington dc': collection = db.dc_eats else: collection = db.restaurants # turn the cursor response into an array object init_array = list(collection.find()) # because sklearn only excepts dict objects with a depth of 1, I need to # iterate through the array that MongoDB gives me and iterate through it till # it meets my specifications array = [] # this will hold my final array of 1-d dict objects # iteration goes here for restaurant in init_array: restaurant.pop('_id') cat = restaurant[ 'categories'] # don't want to lose the info in the category array flat_cat = '' for item in cat: title = item['title'] flat_cat += str(title) + ',' flat = flatdict.FlatDict( restaurant) # collapsing it down to a 1-d dict object flat.update(categories=flat_cat) new_restaurant = {} for key, value in flat.iteritems( ): # can't hand sklearn a flatdictionary object new_restaurant[key] = value array.append(new_restaurant) # add it to my final array # let Mongo rest, it's done it's job conn.close() return array # this array can now be handed to sklearn's DictVectorizer
def resolve_yamls(yaml_templates, environ=os.environ): logger.debug("Merging following yaml_templates: %s" % yaml_templates) logger.debug("Using environ: %s" % environ) merged_yaml = _merge_dicts(reversed(yaml_templates + [environ])) flattened = flatdict.FlatDict(merged_yaml, delimiter=".") keep_resolving = True loops = 0 while keep_resolving and loops < len(flattened): loops += 1 keep_resolving = False for key, value in flattened.items(): keys_to_resolve = re.findall("\$\{(.*?)\}", str(value)) if len(keys_to_resolve) > 0: keep_resolving = True resolved_keys = _resolve_key_substition(flattened, keys_to_resolve) for sub_key, resolved_key in resolved_keys: flattened[key] = flattened[key].replace( "${%s}" % sub_key, str(resolved_key)) return flattened
def test_pop_top(self): key = 'foo' expectation = flatdict.FlatDict({ 'bar:baz': 0, 'bar:qux': 1, 'bar:corge': 2, 'grault:baz': 3, 'grault:qux': 4, 'grault:corge': 5, 'foo:list:0': 'F', 'foo:list:1': 'O', 'foo:list:2': 'O', 'foo:tuple:0': 'F', 'foo:tuple:1': 0, 'foo:tuple:2': 0 }) response = self.object.pop(key) self.assertDictEqual(response, expectation) self.assertTrue(key not in self.object)
def test_pop_top(self): key = 'foo' expectation = flatdict.FlatDict({ 'bar-baz': 0, 'bar-qux': 1, 'bar-corge': 2, 'grault-baz': 3, 'grault-qux': 4, 'grault-corge': 5, 'list-0': 'F', 'list-1': 'O', 'list-2': 'O', 'tuple-0': 'F', 'tuple-1': 0, 'tuple-2': 0 }) response = self.object.pop(key) self.assertDictEqual(response, expectation) self.assertTrue(key not in self.object)
def flatten(properties): """ 将 Dict 转换为 Flatten Array e.g {'a':'b', 'c':[0, 1, 2]} => [ 'a:b', 'c.0:0', 'c.1:1', 'c.2:2' ] :param properties: :return: """ flat = flatdict.FlatDict(properties, delimiter='.') result = [] for k, v in six.iteritems(flat): result.append((k, v)) return result
def locate(): query_params = app.current_request.query_params if query_params is None or query_params.get('bucket_name') is None: return Response( body='Required "bucket_name" query paramater not specified', status_code=400, headers={'Content-Type': 'text/plain'}) bucket_name = app.current_request.query_params.get('bucket_name', None) bucket_map = collapse_bucket_configuration( get_yaml_file(conf_bucket, bucket_map_file, s3_resource)['MAP']) search_map = flatdict.FlatDict(bucket_map, delimiter='/') matching_paths = [ key for key, value in search_map.items() if value == bucket_name ] if (len(matching_paths) > 0): return Response(body=json.dumps(matching_paths), status_code=200, headers={'Content-Type': 'application/json'}) return Response(body=f'No route defined for {bucket_name}', status_code=404, headers={'Content-Type': 'text/plain'})
def add_record(self, record): """ Adds a record to the DB - automatically splits foreign values, ect. To be run from an async function. """ # Process item's values into relevant tables/formats # Lists are symbolic of foreign items. foreign = [item for item in record.items() if type(item[1]) == list] for name, items in foreign: # Add items to their relative table after flattening them # Has to be dicts with an id as added to a new table and referenced table_data = [ flatdict.FlatDict(item, dict_class=OrderedDict) for item in items ] self.table_add(name, table_data) # Set key to name of foreign table for retrieve function to detect. record[name] = str([item['id'] for item in items]) # Item is now the stripped down version (with FKs). Add to main table. self.table_add(self.main_tbl_name, record)
def create_log_data_frame(log_dict_list: List[Dict]) -> pd.DataFrame: """Creates a single pandas dataframe containing all the spins optimization output data. Uses flatdict.FlatDict to transform the hierarchical dictionary data from the SPINS output pickle files into a flat dictionary with colon-separated keys. Args: log_dict_list: List of dictionaries contained in the spins optimization pickle file outputs. Returns: Single pandas dataframe with all the data in the list of dictionaries sorted according to the order in which the log data was written. """ log_df = pd.DataFrame() for log_dict in log_dict_list: # Flatten the log dictionary. flat_log = flatdict.FlatDict(log_dict) # Replace all entries in the flattened log dictionaries that are not strings # with a list containing the entry to allow array data to be stored # in the pandas dataframe cell. for key in flat_log.keys(): if not isinstance(flat_log[key], str): flat_log[key] = [flat_log[key]] # Create a pandas dataframe from the flattened log dictionary and # concatenate it with the existing pandas dataframe which will eventually # store all the log information. single_log_df = pd.DataFrame(dict(flat_log), index=[0]) log_df = pd.concat([log_df, single_log_df], axis=0, ignore_index=True, sort=False) if LOG_COUNTER in log_df.columns: log_df = log_df.sort_values(by=[LOG_COUNTER]) return log_df
def extract_features(self, request): """Extract features from a request given. Parameters ---------- request : dict(object) The request for the features to be extracted. """ # All features needed will be stored in a dictionary. Our machine # learning algorithm will be able to learn from it. features = {} features['remote_host'] = request['remote_host'] features['remote_user'] = request['remote_user'] date_time = request['time_received_utc_datetimeobj'] features['time'] = date_time.strftime('%I:%M:%S') features['date'] = date_time.strftime('%Y-%m-%d') features['request_method'] = request['request_method'] features['status'] = request['status'] user_agent = flatdict.FlatDict( httpagentparser.detect(request['request_header_user_agent'])) features.update(user_agent) features['url_len'] = str(len(request['request_url'])) features['referer_len'] = str(len(request['request_header_referer'])) features['response_bytes'] = request['response_bytes'] features = \ self._process_url(features, request['request_header_referer']) features = self._process_url(features, request['request_url']) return features
} }, "d": [6, 7, 8] })) # {'a': 1, 'c.a': 2, 'c.b.x': 3, 'c.b.y': 4, 'c.b.z': 5, 'd': [6, 7, 8]} # method - 4 -- using flatdict module print( flatdict.FlatDict( { "a": 1, "c": { "a": 2, "b": { "x": 3, "y": 4, "z": 5 } }, "d": [6, 7, 8] }, delimiter=".", )) # collections.abc # ------------------ # 'AsyncGenerator', 'AsyncIterable', 'AsyncIterator', 'Awaitable', # 'ByteString', # 'Callable', 'Collection', 'Container', 'Coroutine', # 'Generator', # 'Hashable',
def emit_formatted(self, record): if self.flatten: results = flatdict.FlatDict(record) self.emit(self._dump_results(results)) else: self.emit(self._dump_results(record))
def test_as_dict(self): flat_dict_value = flatdict.FlatDict(self.VALUES) self.assertDictEqual(flat_dict_value.as_dict(), self.VALUES)
def setUp(self): self.object = flatdict.FlatDict(self.VALUES) self.keys = sorted(self.KEYS)
def setUp(self): self.dict = flatdict.FlatDict(self.DOCUMENT, as_dict_list_awareness=True) self.keys = sorted(self.KEYS)
def setUp(self): self.object = flatdict.FlatDict(self.VALUES, '^') self.object.set_delimiter('-') self.keys = sorted([k.replace(':', '-') for k in self.KEYS])
def main(): client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database) logger = configure_logging('parse_operations') with open(args.input_file, 'r', encoding="latin-1") as f: line_count = 0 for chunk in grouper(f, args.batch_size): json_points = [] for line in chunk: # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch line_count += 1 if line and line.strip().endswith("ms"): values = {} tags = { 'project': args.project, 'hostname': args.hostname, } try: tags['operation'] = line.split("] ", 1)[1].split()[0] except IndexError as e: logger.error( "Unable to get operation type - {} - {}".format( e, line)) break if tags['operation'] in [ 'command', 'query', 'getmore', 'insert', 'update', 'remove', 'aggregate', 'mapreduce' ]: thread = line.split("[", 1)[1].split("]")[0] # Alternately - print(split_line[3]) if tags['operation'] == 'command': tags['command'] = line.split( "command: ")[1].split()[0] if "conn" in thread: tags['connection_id'] = thread split_line = line.split() values['duration_in_milliseconds'] = int( split_line[-1].rstrip('ms')) # TODO 2.4.x timestamps have spaces timestamp = parse(split_line[0]) if split_line[1].startswith("["): # TODO - Parse locks from 2.6 style loglines # 2.4 Logline: tags['namespace'] = split_line[3] for stat in reversed(split_line): if "ms" in stat: pass elif ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) elif stat == "locks(micros)": pass else: break else: # 3.x logline: tags['namespace'] = split_line[5] # TODO - Should we be splitting on "locks:{" instead? pre_locks, locks = line.rsplit("locks:", 1) # Strip duration from locks locks = locks.rsplit(" ", 1)[0] # Add quotation marks around string, so that it is valid JSON locks = re.sub(r"(\w+):", "\"\g<1>\":", locks) locks_document = flatdict.FlatDict( json.loads(locks), delimiter="_") for key, value in locks_document.iteritems(): values["locks_{}".format(key)] = int(value) # We work backwards from the end, until we run out of key:value pairs # TODO - Can we assume these are always integers? for stat in reversed(pre_locks.split()): if ":" in stat: key, value = stat.split(":", 1) values[key] = int(value) else: break # TODO - Parse the full query plan for IXSCAN if 'planSummary: ' in line: tags['plan_summary'] = (line.split( 'planSummary: ', 1)[1].split()[0]) json_points.append( create_point(timestamp, "operations", values, tags)) else: logger.info( "'{}' is not a recognised operation type - not parsing this line ({})" .format(tags['operation'], line)) if json_points: # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator try: # TODO - Have a dry-run mode write_points(logger, client, json_points, line_count) pass except Exception as e: logger.error("Retries exceeded. Giving up on this point.")
def vardict(existing=None): vdict = flatdict.FlatDict(delimiter='__') if existing: for (k, v) in existing.items(): vdict[k] = v return vdict