def test_filter_dict(): d = { 'name': 'eleme', '_name': 'e', } def f(k, v): return not k.startswith('_') assert utils.filter_dict(f, d) == {'name': 'eleme'} f = lambda k, v: not k.startswith('_') # noqa assert utils.filter_dict(f, d) == {'name': 'eleme'}
def mineApyori(gs, **kwargs): """ Mine association rules using apyori library min_support, min_confidence, min_lift and max_length are apyori arguments show_rules is a boolean flag to decide if rules will be printed to screen samples may be specified to indicate which news we want to keep and rest are filtered if samples is None, all news will get considered """ apyoriKwargs = {} min_support = kwargs.get('min_support', 1e-4) min_confidence = kwargs.get('min_confidence', 0.0) min_lift = kwargs.get('min_lift', 0.0) max_length = kwargs.get('max_length', None) show_rules = kwargs.get('show_rules', True) samples = kwargs.get('samples', None) apyoriKwargs['min_support'] = min_support apyoriKwargs['min_confidence'] = min_confidence apyoriKwargs['min_lift'] = min_lift apyoriKwargs['max_length'] = max_length support_where = {} for sg in gs.subgraphs.values(): support_where[sg.gid] = gs.support_where[sg.gid] if samples: support_where = utils.filter_dict(support_where, samples) records = getTransactions(support_where, len(gs.graphs)) gen = apyori.apriori(records, **apyoriKwargs) if show_rules: print_rules(gen) return gen
def filter_model_fields(self): """ Filter out all fields necessary for this model. Add new attribute with all fields. NOTE: How to cope with nested models? """ return utils.filter_dict(self.data, self._model_fields)
def save_docs(idxs, docs, params, name): '''Save the idxs, docs with pickle. Then you can load it again by using get_docs with name''' docs = np.array(docs).astype(np.int) idxs = np.array(idxs).astype(np.int) params = filter_dict(params, prec.default_params.keys()) # only save parameters relevant to the preproc params["docs_id"] = random_id() file_path = get_docs_path(name) file = open(file_path, "wb" ) pickle.dump((idxs, docs, params), file) file.close()
def getSequences(gs, samples=None, days=1): """ Returns sequences of subgraphs This sequence consists of subgraph IDs and are char (pymining works on chars) gs is gSpan object Samples specify which subgraphs should be considered (rest are filtered) days attribute specify how long (in terms of time) our sequences should cover For example: 7 days means sequence consists of the subgraphs seen in a week in order """ support_where = {} for sg in gs.subgraphs.values(): support_where[sg.gid] = gs.support_where[sg.gid] if samples: support_where = utils.filter_dict(support_where, samples) records = getTransactions(support_where, len(gs.graphs)) # Need to modify following arguments for different months # jan : 0, feb : 895, mar : 1726, apr : 2578, may : 3533, # jun : 4549, jul : 5626, aug : 6575, sep : 7449, # oct : 8277, nov : 9292, dec : 10686 # TODO: Embed date attribute to nodes in the database, # get them directly instead of needing these values dates = _getDates(0, 0 + len(gs.graphs)) # group by days group_count = math.ceil((max(dates) / days) + 1) groups = {x:[] for x in range(group_count)} for i in range(len(dates)): groups[math.floor(dates[i] / days)].append(i) sequences = [] for k, v in groups.items(): seq = [] for trans_id in v: trans = records[trans_id] for subgid in trans: seq.append(subgid) # convert to char for pymining sequences.append("".join([chr(x) for x in seq])) #print("Sequence {}: {}".format(k, seq)) return sequences
def get_user(sysNam, usrDict): try: usr = _usrs.by_name(sysNam, usrDict['username']) except KeyError: # add the 1st identity to a top level of the users' dict # ('cause POST API call to /users works with only one extern_uid) dictWithUid = filter_dict(dict(usrDict.items() + (usrDict['identities'] and usrDict['identities'][0].items() or [])), 'admin', 'bio', 'can_create_group', 'extern_uid', 'linkedin', 'password', 'projects_limit', 'provider', 'skype', 'twitter', 'website_url') usr = _usrs.add(sysNam, usrDict['username'], usrDict['name'], usrDict['email'], confirm = False, **dictWithUid) # rebuild the cache after adding a new user _usrs.clr_cache(sysNam) return usr['id']
def update_layout(self, child, properties): child.widget.grid(**filter_dict(properties, Table.grid_rule_map))
def post_setup(self, child): child.widget.grid(**filter_dict(child.layout_properties, Table.grid_rule_map))
def update_font(self): font_info = filter_dict(self.widget_properties, {"font_size": "size"}) font_info = dict([(k, v) for k, v in font_info.items() if v]) font = tkFont.Font(**font_info) self.widget["font"] = font
# Cluster the subgraphs and sample them so that we get single representatives of similar subgraphs #gedObj = ged.GraphEditDistance(False, gs.subgraphs, node_subst_cost=2, node_del_cost=2, node_ins_cost=2, reduce_graphs=False) #clusters = gedObj.get_clusters(0.1) clusters = gohe.get_clusters(gs.subgraphs, 0.9) samples = utils.sample_clusters(clusters) # Uncomment lines to print the queries for sg in gs.subgraphs.values(): # print("") # print_graph(sg) support_where[sg.gid] = gs.support_where[sg.gid] # print(gSpan2query(sg)) # print("") # Filter support_where according to samples support_where = utils.filter_dict(support_where, samples) print("Reduced Subgraph Count :", len(samples)) print("Mining frequent sequences...") # Mine frequent sequences freq_seqs = rm.frequentSequences(gs, samples, 3, 7, 1, 1) # reID variables, needed because when we run this script for different months, we get same ID's (always starts from 0) # But we need them to be different, thus we need to change start_ID and reID them subgraphs, samples, freq_seqs, support_where = utils.reID(gs.subgraphs, samples, freq_seqs, support_where, start_ID=0) # Mine rules from frequent sequences
pass try: # Create friendship edges userfeed = self._read_json_data("%s_feed" % fuid) logger.info("Now processing feed of user %s" % fuid) # For each activity for activity in userfeed: try: # Create Node node = DSLSerializable("Artifact", activity['id']) node.add_attr("time", activity['created_time']) node.add_attrs( filter_dict(activity, ['likes', 'shares', 'to', 'from', 'created_time', 'comments']) ) self.write_dsl(node) if activity.get("from"): post_from = activity["from"]["id"] self.create_person_node_if_not_exists(activity["from"]["id"], activity["from"]) else: post_from = fuid if activity.get("to"): post_to = [i["id"] for i in activity["to"]["data"] ] for i in activity["to"]["data"]: self.create_person_node_if_not_exists(i["id"], i) else: post_to = [fuid]
usage() try: srcSys = opts['-s'] dstSys = opts['-d'] grpNam = opts['-g'] except KeyError: usage() # create a group itself grp = Groups() try: srcGid = grp.by_name(srcSys, grpNam)['id'] except KeyError: usage("Group with name '%s' doesn't exist in the source system" % grpNam) dstGid = grp.add(dstSys, grpNam)['id'] # add members to the group add_members(grp, srcGid, dstGid) # copy projects with their members from the source group to the destination one prj = Projects() for p in prj.by_namespace(srcSys, srcGid): add_members(prj, p['id'], prj.add(dstSys, p['name'], namespace_id = dstGid, **filter_dict(p, 'description', 'issues_enabled', 'merge_requests_enabled', 'wiki_enabled', 'snippets_enabled', 'visibility_level'))['id'])
def Predeal_dicts(src_wc_path, tgt_wc_path, dict_path, threthold): src_wc, tgt_wc = utils.load_word_count(src_wc_path), utils.load_word_count( tgt_wc_path) print('Src_wc:{} tgt_wc:{}'.format(len(src_wc), len(tgt_wc))) utils.filter_dict(dict_path, src_wc, tgt_wc, threthold=threthold)
try: # Create friendship edges userfeed = self._read_json_data("%s_feed" % fuid) logger.info("Now processing feed of user %s" % fuid) # For each activity for activity in userfeed: try: # Create Node node = DSLSerializable("Artifact", activity['id']) node.add_attr("time", activity['created_time']) node.add_attrs( filter_dict(activity, [ 'likes', 'shares', 'to', 'from', 'created_time', 'comments' ])) self.write_dsl(node) if activity.get("from"): post_from = activity["from"]["id"] self.create_person_node_if_not_exists( activity["from"]["id"], activity["from"]) else: post_from = fuid if activity.get("to"): post_to = [i["id"] for i in activity["to"]["data"]] for i in activity["to"]["data"]: self.create_person_node_if_not_exists( i["id"], i)