def circles(self, adjlist=None): """ cannot remove them for they are too complex:p so just find them. TODO: remove the circles!!! """ if adjlist is not None: return tarjan.tarjan(adjlist) if self.adjList is None: self.convert2adjlist() return tarjan.tarjan(self.adjList)
def get_smith_set(self, candidates_to_check): ''' This function returns a tuple of cycle elements The Smith set is the largest set of candidates where no other candidates outside such a set are preferred over those inside the set. If the size of the Smith set is 1, then that is the Condorcet winner. Otherwise there is no Condorcet winner. ''' if len(candidates_to_check) == 1: return set(candidates_to_check) graph = defaultdict(list) for i in candidates_to_check: for j in candidates_to_check: # Candidate i domminates Candidate j if i != j and self._n_vote_i_to_j[i][j] \ >= self._n_vote_i_to_j[j][i]: graph[i].append(j) cycles = set(tuple(x) for x in tr.tarjan(graph)) removal_set = set() # if a member of cycle x is preferred to a member of cycle y, then all # of cycle x is preferred to y, so y is not the smith set. for x in cycles: for y in cycles: if x != y and y[0] in graph[x[0]]: removal_set.add(y) smith_set = set((cycles - removal_set).pop()) return smith_set
def two_sat(problem): # create_assignment # # Assign values to variable, given the condensation of the implication graph, # a smaller graph that has one vertex for each strongly connected component, # and an edge from component i to component j whenever the implication graph # contains an edge uv such that u belongs to component i and v belongs to component j. # The condensation is automatically a directed acyclic graph and, # like the implication graph from which it was formed, it is skew-symmetric. def create_assignment(condensation): assignment = [] for component in condensation: for v in component: if not v in assignment and not -v in assignment: assignment.append(v) return sorted(assignment, key=lambda x: abs(x)) n, m, clauses = problem edges = [(-a, b) for a, b in clauses] + [(-b, a) for a, b in clauses] scc = tarjan(create_adj([[n, len(edges)]] + edges)) for component in scc: for i in range(len(component)): for j in range(i + 1, len(component)): if component[i] == -component[j]: return 0, [] return 1, create_assignment(scc)
def get_sort_sccs(arcs): m = defaultdict(set) for ds, rng, ast in arcs: m[ds.name].add(rng.name) sccs = tarjan(m) return sccs
def get_execution_order(): dep_graph.clear() uncommitted.clear() can_execute = build_graph(instance) sc_components = tarjan(dep_graph) return can_execute, sum( [sorted(c, cmp=scc_comparator) for c in sc_components], [])
def do(filename_out, delimiter, mode, gephi_out): cycles_removed = 0 cycle = [] # Initialize with value to trigger the while loop. Python has no do-while... while (cycle is not None): t = tarjan(hyper_to_hypo) # ['plant pathology','pathology'], i = 0 cycle = None while (cycle is None and i < len(t)): if len(t[i]) > 1: # Do pruning print("Cycle detected: %s" % t[i]) cycle = t[i] i += 1 if cycle is not None: hypernym_index_removed_from = random.randint(0, len(cycle) - 1) hypernym_removed_from = cycle[hypernym_index_removed_from] for c in cycle: if c in hyper_to_hypo[hypernym_removed_from]: print("Remove hyponym '%s' from hypernym '%s'." % (c, hypernym_removed_from)) hyper_to_hypo[hypernym_removed_from].remove(c) cycles_removed += 1 break print("Removed %s cycles." % cycles_removed) methods.util.write_graph.hyper_to_hypo_graph(filename_out, hyper_to_hypo, gephi_out=gephi_out, delimiter=delimiter) return cycles_removed
def reduct(self): if self.nb_implications == 0: return self implications = list(self.parents) data = self.data equivalent_items = tarjan( {i: childs for i, childs in enumerate(implications)}) equivalent_items = sorted(map(sorted, equivalent_items), key=lambda element: element[0]) old_to_new_indice = [None for i in range(data.m)] for new_indice, equivalents in enumerate(equivalent_items): for old_indice in equivalents: old_to_new_indice[old_indice] = new_indice new_implications = [] for element in equivalent_items: new_childs = set() for i in element: new_childs |= set( map(lambda old: old_to_new_indice[old], implications[i])) new_implications.append( frozenset(new_childs) - frozenset([len(new_implications)])) semi_reduced_data = data.fusion_equivalent_itemsets(equivalent_items) parents = DataWithImplication._remove_indirect_implications( new_implications) childs = DataWithImplication.reverse(parents) return DataWithImplication(semi_reduced_data, childs, parents)
def trovaPrincipale(grafo): '''Restituisce il nodo principale del grafo (ossia che può raggiungere tutti gli altri nodi) se esiste, altrimenti restituisce -1. Costo O(n + m).''' def dfs(nodo, grafo): for adiacente in grafo[nodo]: if padri[adiacente] == adiacente: padri[adiacente] = nodo dfs(adiacente, grafo) # Mi ricavo il grafo delle parti poichè esso essendo un DAG posso calcolarmi # il vettore dei padri senza problemi. nodi_componenti = tarjan(grafo) grafo_componenti = parti(grafo) # Metto un 0 in prima posizione perchè le comp. partono da 1 ma un array # parte da 0! padri = [0] + [nodo for nodo in grafo_componenti] for nodo in grafo_componenti: # il grafo non è connesso. if padri[nodo] == nodo: dfs(nodo, grafo_componenti) # Ci possono essere più nodi radice, in tal caso non c'è un nodo principale. principale = [nodo for nodo in grafo_componenti if padri[nodo] == nodo] if len(principale) == 1: # Ho il nome della componente, mi serve un qualsiasi nodo di tale comp. for nodo in grafo: if nodi_componenti[nodo] == principale[0]: return nodo else: return -1
def test_simple(): g = [ GRAPH_SCC_PAIRING({ 'A': ['B'], 'B': ['A'] }, [{'A', 'B'}]), GRAPH_SCC_PAIRING({ 'A': [], 'B': [] }, [{'A'}, {'B'}]), GRAPH_SCC_PAIRING( { 1: [2, 3, 4, 5], 2: [1, 3, 5], 3: [1], 4: [], 5: [1, 2, 3, 4, 5], }, [{1, 2, 3, 5}, {4}]), GRAPH_SCC_PAIRING({ 1: [], 2: [], 3: [], 4: [], 5: [1, 2, 3, 4, 5], }, [{1}, {2}, {3}, {4}, {5}]) ] for gscc_pair in g: assert freeze_then_compare(tarjan(gscc_pair.graph), gscc_pair.scc_set)
def eg(w, kripke_structure, expression, level): if evaluate(w, kripke_structure, expression, level + 1) is False: return False worlds_that_satisfies_expression = [] for world in kripke_structure["worlds"]: if evaluate(world, kripke_structure, expression, level + 1) is True: worlds_that_satisfies_expression.append(world) relations_sat = deepcopy(kripke_structure["relations"]) for world in relations_sat.keys(): if world not in worlds_that_satisfies_expression: del relations_sat[world] else: for neigh in list(relations_sat[world]): if neigh not in worlds_that_satisfies_expression: relations_sat[world].remove(neigh) strongly_connected_components = tj.tarjan(relations_sat) for strongly_connected_component in strongly_connected_components: if (len(strongly_connected_component) == 1 and strongly_connected_component[0] not in relations_sat[strongly_connected_component[0]]): continue result = [] exists_path(w, relations_sat, strongly_connected_component, [], result) if len(result) > 0: return True return False
def get_complex_complete_mccabe_complexity_angr(self, use_accurate=False): cfg = self.__get_cfg__(use_accurate) graph_for_tarjan_algorithm = {} for n, nbrsdict in cfg.graph.adjacency(): if len(list(nbrsdict)) >= 2: graph_for_tarjan_algorithm[n] = list(nbrsdict) return cfg.graph.number_of_edges() - len( cfg.graph) + 2 * len(tarjan.tarjan(graph_for_tarjan_algorithm))
def has_cycle(self) -> bool: cycles = tarjan( self.get_traversal_dictionary(exclude_disabled_connection=True)) for cycle in cycles: if len(cycle) > 1: return True return False
def ordered_dependencies(schema_map): dependencies = resolve_dependencies(schema_map) orders = tarjan.tarjan(dependencies) schemas = [] for clique in orders: assert len(clique) == 1, f'cyclic dependency detected: {clique}' schemas.append(schema_map[clique[0]]) return schemas
def compute_scc_from_graph(g): dict = {} for n in g.nodes: collect_succssor = [] for s in g.successors(n): collect_succssor.append(s) dict[n] = collect_succssor return tarjan(dict)
def count_cycles(A): d = {} for a, b in A: if a not in d: d[a] = [b] else: d[a].append(b) return sum([1 for e in tarjan(d) if len(e) > 1])
def get_complex_complete_mccabe_complexity_angr(path_to_seralized): cfg_to_analyze = get_loaded_cfg(path_to_seralized) graph_for_tarjan_algorithm = {} for n, nbrsdict in cfg_to_analyze.graph.adjacency(): if len(list(nbrsdict)) >= 2: graph_for_tarjan_algorithm[n] = list(nbrsdict) return cfg_to_analyze.graph.number_of_edges() - len( cfg_to_analyze.graph) + 2 * len( tarjan.tarjan(graph_for_tarjan_algorithm))
def main(): """For testing with iPython""" # nx.draw_spring(g, with_labels=True) dfs = nx.Graph() # for k, v in tarjan(draw_network(), "sum1")[0].items(): # for k, v in tarjan(draw_small(), "A")[0].items(): for k, v in tarjan(draw_wiki(), "A")[0].items(): dfs.add_edge(k, v) nx.draw_spring(dfs, with_labels=True)
def find_strongly_connected_component(self, graph, left, right): """ Tarjan algorithm finds the strongly connected components from the constructed directed graph Time: O(n + m) """ directed_graph = self.construct_directed_graph(graph, left, right) return tarjan(directed_graph)
def maximal_strongly_connected_component(processed_data): # find maximal strongly connected component of graph with tarjan algorithm graph = { cue: [response for response in processed_data[cue]] for cue in processed_data } scc_list = tarjan(graph) max_scc = sorted(scc_list, key=lambda x: len(x), reverse=True)[0] max_scc = set(max_scc) return max_scc
def minimal_sink_sets(self, level=0): self.logger.debug("Computing minimal sink sets.") adj_dict = {} for x in range(len(self.adj_matrix)): ys = set(np.where(self.adj_matrix[x] <= level)[0]) ys -= {x} adj_dict[x] = ys tj = tarjan(adj_dict) return list_of_list_to_name(tj, self.taxa_names.inverse)
def selfLoop(self): circleList=tarjan.tarjan(self.graph) count=0 size=0 for circle in circleList: if len(circle)>1: count+=1 size+=len(circle) return count, float(size)/count
def run_tarjan(mat): # Build adjacency lists tarjan_dict = {} for i in xrange(len(mat)): tmplist = [] for j in xrange(len(mat)): if mat[i][j] == 1: tmplist.append(j) tarjan_dict[i] = tmplist return tarjan.tarjan(tarjan_dict)
def get_graph(filename,tweet_file): # get everything given the user_follow backup file and the original tweet file raw_graph = get_graph_raw(filename) # verify_filter(raw_graph) filtered_graph = filter_useless_nodes(raw_graph) # verify_filter(filtered_graph) print 'number of nodes :',len(filtered_graph) saveAsJson(filtered_graph,'data/filtered_graph.json') # the graph with removed user ids # raw_graph = None old_id_to_good_id,good_id_to_old_id = get_graph_id_to_good_id_mapping(filtered_graph) # test_strongly_connected(new_graph,new_graph) saveAsJson(old_id_to_good_id,'data/old_id_to_good_id.json') # the mapping original id --> good id saveAsJson(good_id_to_old_id,'data/good_id_to_old_id.json') # the mapping good id --> original id new_graph = transform_graph(filtered_graph,old_id_to_good_id) saveAsJson(new_graph,'data/new_graph.json') # the filtered graph according to the new good id # filtered_graph = None scc_list = tarjan.tarjan(new_graph) # using tarjan's algorithm to do the scc partition scc_list.sort(key = lambda x:len(x),reverse = True) saveAsJson(scc_list,'data/scc_list.txt') # the scc list according the good id max_scc = max(scc_list,key = lambda x:len(x)) print ('max_scc size ',len(max_scc)) saveAsJson(max_scc,'data/max_scc.json') # max scc according to good id scc_graph = get_scc_graph(new_graph,max_scc) saveAsJson(scc_graph,'data/scc_graph.json') # the max extracted scc graph [good id] scc_graph_true_id = transform_graph(scc_graph,good_id_to_old_id) saveAsJson(scc_graph_true_id,'data/scc_graph_true_id.json') # the max scc graph with original id print 'size of max scc graph : ',len(scc_graph_true_id) save_node_list(scc_graph_true_id,'data/nodelist.txt') # nodelist of scc original id save_edge_list(scc_graph_true_id,'data/edgelist.txt') # edge list of scc original id filtered_graph = None scc_graph = None new_graph = None old_id_to_good_id = None good_id_to_old_id = None get_scc_tweets(tweet_file,scc_graph_true_id) # saving the extracted tweets in # 'data/filtered_opinion.csv' test_strongly_connected(scc_graph_true_id,raw_graph) # the test code ... may be disabled if required
def find_strongly_connected_components(self): formatted_graph = {} for vertex_one in range(len(self.adj_matrix)): edges_from_vertex_one = [] for vertex_two in range(len(self.adj_matrix)): if self.has_edge(self.adj_matrix, vertex_one, vertex_two): edges_from_vertex_one.append(vertex_two) formatted_graph[vertex_one] = edges_from_vertex_one strongly_connected_components = tarjan(formatted_graph) return strongly_connected_components
def sort(items): descriptions = map(serialize, items) names = map(lambda item: item['name'], descriptions) resolved_descriptions = resolve(descriptions) topological_sort = tarjan(resolved_descriptions) flattened_sort = sum(topological_sort, []) def find(name): i = names.index(name) return items[i] return map(find, flattened_sort)
def test_strongly_connected(graph,actual_graph): # test code verify_filter(graph) old_id_to_good_id,good_id_to_old_id = get_graph_id_to_good_id_mapping(graph) new_graph = transform_graph(graph,old_id_to_good_id) scc_list = tarjan.tarjan(new_graph) print 'scclist size in graph :',len(scc_list) test_using_shortest_path(new_graph) is_subgraph_test(actual_graph,graph) return
def maxcover(self): numfeedbacks = [] graph = self._graph_generator(self.network.matrix) scc = tj.tarjan(graph) original_numfeedbacks = self._numfeedbacks(scc) for i in range(self.n): mod_matrix = np.delete(self.network.matrix, i, 0) mod_matrix = np.delete(mod_matrix, i, 1) graph = self._graph_generator(mod_matrix, n= self.n-1) scc = tj.tarjan(graph) numfeedbacks.append((i, self._numfeedbacks(scc))) if not self.outputfile: self.outputfile = "maxcoverage.txt" out = open("fvsfinder/result/"+self.outputfile, "w") out.write("Original Network has " + str(original_numfeedbacks) + " feedbacks.") for node, num in numfeedbacks: line = "Removal of Node " + self.nodes[node] + " reduces number of SCCs into " + str(num) + ".\n" print(line) out.write(line) out.close()
def find_strongly_connected_components(self, edges): # This function implements Tarjan's find strongly connected components scc = tarjan(edges) largest_strong_components = [] for entry in scc: try: l = len(entry) if l > len(largest_strong_components): largest_strong_components.extend(entry) except: print("Element", entry, "has no defined length") return largest_strong_components
def hash_func(cls, adjacency): # JSON only allows strings as keys, so we gotta convert them back to ints # before we run tarjan's algorithm on the adjacency dict adjacency = {int(k): v for k, v in json.loads(adjacency).items()} graph = nx.convert.from_dict_of_lists(adjacency, create_using=nx.DiGraph) t_order = sum(tarjan(adjacency), []) embs = np.array([ (t_order.index(s), graph.in_degree[s], graph.out_degree[s], graph.in_degree[d], graph.out_degree[d]) for s, d in graph.edges ]) return (1 / np.sqrt(np.dot(embs, cls.qs))).sum()
def parse_successor_infos(input_file): rgx_part = re.compile('Artikel".*"([^"]*)"') rgx_var = re.compile('ArtVar".*"([^"]*)"') rgx_mrparea = re.compile('MRPArea"[^0-9]+(\d+)') rgx_part_s = re.compile('ArtikelSuccessor".*"([^"]*)"') rgx_var_s = re.compile('ArtikelVarSuccessor.*"([^"]*)"') rgx_mrparea_s = re.compile('MRPAreaSuccessor[^0-9]+(\d+)') successors = {} part = var = mrp = part_s = var_s = mrp_s = NONE for line in open(input_file): hit = rgx_part.search(line) if hit: part = hit.group(1) continue hit = rgx_var.search(line) if hit: var = hit.group(1) continue hit = rgx_mrparea.search(line) if hit: mrp = hit.group(1) continue hit = rgx_part_s.search(line) if hit: part_s = hit.group(1) continue hit = rgx_var_s.search(line) if hit: var_s = hit.group(1) continue hit = rgx_mrparea_s.search(line) if hit: mrp_s = hit.group(1) v = "/" + var if var else "" vs = "/" + var_s if var_s else "" a = "%s%s/%s" % (part, v, mrp) b = "%s%s/%s" % (part_s, vs, mrp_s) successors.setdefault(a, []) successors[a].append(b) for key in successors: print("%s -> %s" % (key, successors[key])) print() for key in successors: if key in successors[key]: print("direct circle %s <-> %s" % (key, key)) print(strip_result(tarjan(successors)))
def include_tag(loader, node): """ Provides simplistic include support for YAML. """ current_file = path(loader.stream.name) include_file = current_file.parent.join(node.value) if not include_file.exists: raise yaml.YAMLError("'%s' does not exist" % include_file) _include_edges[current_file.absolute].append(include_file.absolute) for edges in tarjan.tarjan(_include_edges): if len(edges) > 1: raise yaml.YAMLError("circular dependency detected between %r" % edges) return yaml.load(include_file.open())
def gen(ast): name_dag = dict() decls = dict() defs = dict() d = dict({ 'assgns': '', 'vdecls': '', 'include_user_module_header': '', 'post_init_hook': '' }) visitor = AstVisitor({'module_settings': module_settings}, default=collect_node, onleaf=collect_leaf, kind='red') visitor.visit(ast, (name_dag, decls, defs, d)) names_sccs = tarjan(name_dag) d['header_defsdecls'] = '' #First declare the types for scc in names_sccs: #write all typedefs first to allow circular refs if len(scc) > 1: #For now, since types are not recursive, we should not have scc. # internal_error("We have a scc of size {}".format(len(scc))) pass for tname in scc: tdecl = decls.get(tname, None) if tdecl: d['header_defsdecls'] += tdecl #write the definitions for tname in scc: tdef = defs.get(tname, None) if tdef: str d['header_defsdecls'] += tdef d['asttype'] = qn.c_typename(ast) d['astvalue'] = qn.c_varname(ast) d['astinit'] = qn.c_modinit(ast) d['astfun'] = qn.c_astfun(ast) d['ast_includes'] = d['includes_init'] = '' for n in infos.loaded_modules: d['ast_includes'] += '\n#include "{}"'.format(qn.file_ast(n)) d['includes_init'] += '\n {}();'.format(qn.c_modinit(n)) d['ast_h_filename'] = qn.file_ast(ast, '.h') d['ast_c_filename'] = qn.file_ast(ast, '.c') return (d['ast_c_filename'], cpp_template.format(**d), d['ast_h_filename'], header_template.format(**d))
def load_components(self, components_by_name): missing_deps = { ref: LatexDependency(ref) for ref in self.references() if ref.name not in components_by_name } missing_deps[self.file] = self graph = { dep: [ missing_deps[ref] for ref in dep.references() if ref in missing_deps ] for dep in missing_deps.values() } return tarjan(graph)
def compute_strongly_connected_component(): dict = {} for n in graph.nodes: collect_succssor = [] for s in graph.successors(n): collect_succssor.append(s) dict[n] = collect_succssor scc = tarjan(dict) print('# Connected Component : ', len(scc)) filter_scc = [x for x in scc if len(x) > 1] print('# Connected Component Filtered: ', len(filter_scc)) ct = Counter() for c in filter_scc: ct[len(c)] += 1 if len(c) in [2799, 3926, 3568]: print(c)
def tc(g): """ Given a graph @g, returns the transitive closure of @g """ ret = {} for scc in tarjan(g): ws = set() ews = set() for v in scc: ws.update(g[v]) for w in ws: assert w in ret or w in scc ews.add(w) ews.update(ret.get(w,())) if len(scc) > 1: ews.update(scc) ews = tuple(ews) for v in scc: ret[v] = ews return ret
def two_sat(problem): def create_assignment(condensation): assignment = [] for component in condensation: for v in component: if not v in assignment and not -v in assignment: assignment.append(v) return sorted(assignment, key=lambda x: abs(x)) n, m, clauses = problem edges = [(-a, b) for a, b in clauses] + [(-b, a) for a, b in clauses] scc = tarjan.tarjan(create_adj([[n, len(edges)]] + edges)) for component in scc: for i in range(len(component)): for j in range(i + 1, len(component)): if component[i] == -component[j]: return 0, [] return 1, create_assignment(scc)
def compute_alpha_beta(scc_graphs): num_all_scc_edges = 0 num_of_size_two_cycle_edges = 0 num_edges_left_in_new_SCC = 0 resulting_graph = nx.DiGraph() # the resuling graph after computing SCC for s in scc_graphs: resulting_graph.add_edges_from(s.subgraph(s).edges()) num_all_scc_edges += s.number_of_edges() edges_to_remove = set() for (l, r) in s.edges(): if (r, l) in s.edges(): edges_to_remove.add((l, r)) edges_to_remove.add((r, l)) num_of_size_two_cycle_edges += len(edges_to_remove) resulting_graph.remove_edges_from(list(edges_to_remove)) # now compute the SCCs for this new graph mydict = {} for n in resulting_graph.nodes: collect_succssor = [] for s in resulting_graph.successors(n): collect_succssor.append(s) mydict[n] = collect_succssor sccs = tarjan(mydict) filter_scc = [x for x in sccs if len(x) > 1] for f in filter_scc: num_edges_left_in_new_SCC += resulting_graph.subgraph( f).number_of_edges() print('num_all_edges = ', num_all_scc_edges) print('num_of_size_two_cycle_edges = ', num_of_size_two_cycle_edges) print('num_edges_left_in_new_SCC = ', num_edges_left_in_new_SCC) alpha = num_of_size_two_cycle_edges / num_all_scc_edges beta = num_edges_left_in_new_SCC / num_all_scc_edges return (alpha, beta)
def __init__(self, video1: Video, video2: Video, delta=5000): self.pts_1 = video1.points() self.pts_2 = video2.points() self.delta = delta self.graph = {p.uuid: [] for p in self.pts_1} self.graph.update({p.uuid: [] for p in self.pts_2}) for pt1 in self.pts_1: for pt2 in self.pts_2: if pt1.match(pt2, self.delta): self.graph[pt1.uuid].append(pt2.uuid) for pt2 in self.pts_2: for pt1 in self.pts_1: if pt2.match(pt1, self.delta): self.graph[pt2.uuid].append(pt1.uuid) self.tj = tarjan(self.graph) self.tj_pts = self.tj_to_points()
def order_resources(self, resources): if not resources: return resources graph = OrderedDict((res.name, []) for res in resources) for res in resources: if res.after and res.after in graph: graph[res.name].append(res.after) if res.before and res.before in graph: graph[res.before].append(res.name) groups = tarjan(graph) cyclic_order = filter(lambda x: len(x) > 1, groups) if cyclic_order: raise CyclicResourceOrder(map(sorted, cyclic_order), resources, graph) order = reduce(list.__add__, groups) return sorted(resources, key=lambda res: order.index(res.name))
def get_execution_order(): dep_graph.clear() uncommitted.clear() can_execute = build_graph(instance) sc_components = tarjan(dep_graph) return can_execute, sum([sorted(c, cmp=scc_comparator) for c in sc_components], [])
def main(data): def str_to_date(s): if s is None: return None return datetime(*strptime(s, "%Y-%m-%d")[:3]) def year_to_dates(year): return (datetime(2003 + year, 9, 1), datetime(2004 + year, 8, 31)) def create_tag(name, humanName, tags=[]): return Es.ecol.insert( {"types": ["tag"], "names": [name], "humanNames": [{"name": name, "human": humanName}], "tags": tags} ) print "dropping" Es.ecol.drop() Es.rcol.drop() Es.mcol.drop() subscr_Es.ecol.drop() subscr_Es.scol.drop() print "creating indices" Es.ensure_indices() subscr_Es.ensure_indices() mod_Es.ensure_indices() conv_inst = dict() conv_study = dict() conv_group = dict() conv_group_byname = dict() conv_event = dict() conv_seat = dict() conv_user = dict() ignore_groups = frozenset(["leden-oud"]) ignore_groups_members = frozenset(["leden"]) ignore_groups_ids = set() ignore_groups_members_ids = set() year_groups = frozenset( ["leden" + str(x) for x in range(1, 9)] + ["kasco" + str(x) for x in range(1, 9)] + ["bestuur" + str(x) for x in range(1, 9)] ) year_groups_ids = dict() year_groups_lut = {} print "initial tags" system_tag = create_tag("!system", "Systeemstempels") year_overrides_tag = create_tag("!year-overrides", "Jaarlidmaatschapstempels", [system_tag]) virtual_group_tag = create_tag("!virtual-group", "Virtuele groep", [system_tag]) sofa_brand_tag = create_tag("!sofa-brand", "Sofa merk", [system_tag]) year_group_tag = create_tag("!year-group", "Jaargroep", [system_tag]) for i in xrange(1, 9): Es.ecol.insert( { "types": ["tag"], "humanNames": [{"human": "Wel jaar %s" % i}], "year-override": {"year": i, "type": True}, "tags": [year_overrides_tag], } ) Es.ecol.insert( { "types": ["tag"], "humanNames": [{"human": "Niet jaar %s" % i}], "year-override": {"year": i, "type": False}, "tags": [year_overrides_tag], } ) print "institutes" for m in data["EduInstitute"]: n = {"types": ["institute"], "humanNames": [{"human": m["name"]}]} conv_inst[m["id"]] = Es.ecol.insert(n) print "studies" for m in data["Study"]: n = {"types": ["study"], "humanNames": [{"human": m["name"]}]} conv_study[m["id"]] = Es.ecol.insert(n) print "initial groups" conv_group_byname["bestuur"] = { "id": Es.ecol.insert( { "types": ["group"], "names": ["bestuur"], "tags": [year_group_tag], "humanNames": [{"name": "bestuur", "human": "Bestuur", "genitive_prefix": "van het"}], "description": "Het bestuur", } ), "name": "bestuur", "humanName": "Bestuur", } conv_group_byname["kasco"] = { "id": Es.ecol.insert( { "types": ["group"], "names": ["kasco"], "tags": [year_group_tag], "humanNames": [{"name": "kasco", "human": "Kascontrolecommissie", "genitive_prefix": "van de"}], "description": "De kascontrolecommissie", } ), "name": "kasco", "humanName": "Bestuur", } print "groups" for m in data["OldKnGroup"]: if m["name"] in ignore_groups: ignore_groups_ids.add(m["id"]) continue if m["name"] in ignore_groups_members: ignore_groups_members_ids.add(m["id"]) if m["name"] in year_groups: year_groups_ids[m["id"]] = m["name"] group = m["name"][:-1] year = int(m["name"][-1:]) year_groups_lut[m["id"]] = (group, year) continue if m["name"] == "leden": m["isVirtual"] = False # fix for leden n = { "types": ["tag" if m["isVirtual"] else "group"], "names": [m["name"]], "humanNames": [{"name": m["name"], "human": m["humanName"], "genitive_prefix": m["genitive_prefix"]}], "description": m["description"], "temp": {"is_virtual": m["isVirtual"]}, } conv_group[m["id"]] = {"id": Es.ecol.insert(n), "name": m["name"], "humanName": m["humanName"]} conv_group_byname[m["name"]] = conv_group[m["id"]] print "group hierarchy" for m in data["OldKnGroup"]: if m["name"] in year_groups or m["name"] in ignore_groups: continue if m["parent"] is not None: if not m["parent"] in conv_group: print " %s was orphaned" % m["name"] continue Es.ecol.update({"_id": conv_group[m["id"]]["id"]}, {"$push": {"tags": conv_group[m["parent"]]["id"]}}) print "users" for m in data["OldKnUser"]: bits = m["password"].split("$") if len(bits) == 3: pwd = {"algorithm": bits[0], "salt": bits[1], "hash": bits[2]} else: pwd = None n = { "types": ["user"], "names": [m["username"]], "humanNames": [{"human": m["first_name"] + " " + m["last_name"]}], "person": { "titles": [], "nick": m["first_name"], "given": None, "family": m["last_name"], "gender": m["gender"], "dateOfBirth": str_to_date(m["dateOfBirth"]), }, "emailAddresses": [{"email": m["email"], "from": DT_MIN, "until": DT_MAX}], "addresses": [ { "street": m["addr_street"], "number": m["addr_number"], "zip": m["addr_zipCode"], "city": m["addr_city"], "from": DT_MIN, "until": DT_MAX, } ], "telephones": [{"number": m["telephone"], "from": DT_MIN, "until": DT_MAX}], "studies": [ { "institute": conv_inst.get(m["institute"]), "study": conv_study.get(m["study"]), "from": DT_MIN, "until": DT_MAX, "number": m["studentNumber"], } ], "temp": { "oud": m["in_oud"], "aan": m["in_aan"], "incasso": m["got_incasso"], "joined": m["dateJoined"], "remarks": m["remarks"], }, "is_active": m["is_active"], "password": pwd, } conv_user[m["id"]] = Es.ecol.insert(n) for g in m["groups"]: if g in ignore_groups_ids or g in ignore_groups_members_ids: continue if g in year_groups_ids: gname, year = year_groups_lut[g] f, u = year_to_dates(year) Es.rcol.insert( { "with": conv_group_byname[gname]["id"], "who": conv_user[m["id"]], "from": f, "until": u, "how": None, } ) continue Es.rcol.insert( {"with": conv_group[g]["id"], "who": conv_user[m["id"]], "from": DT_MIN, "until": DT_MAX, "how": None} ) print "brands" for m in data["OldSeat"]: if m["name"] == "deelhoofd": m["name"] = "graficideelhoofd" if m["name"] in conv_seat: continue n = { "types": ["brand"], "names": [m["name"]], "tags": [sofa_brand_tag], "humanNames": [{"name": m["name"], "human": m["humanName"]}], } conv_seat[m["name"]] = {"id": Es.ecol.insert(n)} print "seats" for m in data["OldSeat"]: if m["group"] in year_groups_ids: gname = year_groups_ids[m["group"]] gdat = conv_group_byname[gname[:-1]] _from, until = year_to_dates(int(gname[-1:])) else: gdat = conv_group[m["group"]] _from, until = DT_MIN, DT_MAX n = { "types": ["group"], "names": [gdat["name"] + "-" + m["name"]], "description": [m["description"]], "tags": [virtual_group_tag], "virtual": {"type": "sofa", "with": gdat["id"], "how": conv_seat[m["name"]]["id"]}, "humanNames": [{"name": gdat["name"] + "-" + m["name"], "human": m["humanName"] + " " + gdat["humanName"]}], } i = Es.ecol.insert(n) Es.rcol.insert( { "who": conv_user[m["user"]], "from": _from, "until": until, "how": conv_seat[m["name"]]["id"], "with": gdat["id"], } ) print "merging relations" print " list until" lut = dict() plan_changes = dict() plan_remove = set() for r in Es.rcol.find({"until": {"$lt": DT_MAX}}): lut[r["until"] + timedelta(1, 0), r["with"], r["how"], r["who"]] = r["_id"] print " crossreference from" for r in Es.rcol.find({"from": {"$gt": DT_MIN}}): n = (r["from"], r["with"], r["how"], r["who"]) if n not in lut: continue plan_changes[lut[n]] = (r["until"], r["_id"]) plan_remove.add(r["_id"]) print " transitive closure of plan" print "small final tweaks to groups" Es.ecol.update({"names": "leden"}, {"$push": {"tags": year_group_tag}}) done = False while not done: done = True for k, v in plan_changes.iteritems(): if v[1] in plan_changes: plan_changes[k] = plan_changes[v[1]] del plan_changes[v[1]] done = False break print " execute" for r in plan_remove: Es.rcol.remove({"_id": r}) for k, v in plan_changes.iteritems(): Es.rcol.update({"_id": k}, {"$set": {"until": v[0]}}) print "event" for m in data["Event"]: if m["owner"] not in conv_group: gname = year_groups_lut[m["owner"]][0] gid = conv_group_byname[gname]["id"] else: gid = conv_group[m["owner"]]["id"] conv_event[m["id"]] = subscr_Es.ecol.insert( { "mailBody": m["mailBody"], "humanName": m["humanName"], "description": m["description"], "cost": m["cost"], "is_open": m["is_open"], "owner": gid, "name": m["name"], } ) print "event subscriptions" for m in data["EventSubscription"]: subscr_Es.scol.insert( { "event": conv_event[m["event"]], "userNotes": m["userNotes"], "debit": m["debit"], "user": conv_user[m["user"]], } ) print "giedo updatedb" update_db(None) print "alias" print " ids_by_names" name2id = Es.ids_by_names() print " to graph" alias_graph = {} for m in data["Alias"]: alias_graph[m["source"]] = m["target"] print " tarjan" for scc in tarjan(alias_graph): assert len(scc) == 1 src = scc[0] if src in name2id: continue if not src in alias_graph: continue if not alias_graph[src] in name2id: print " ? %s -> %s" % (src, alias_graph[src]) continue name2id[src] = name2id[alias_graph[src]] Es.ecol.update({"names": alias_graph[src]}, {"$push": {"names": src}})
#!/usr/bin/python from tarjan import tarjan import yaml f = open('combined.json', 'r') j =yaml.load(f.read()) print('Loaded the graph') t = tarjan(j) sorted_t = sorted(t, key=len) print 'SCC size: ', len(sorted_t[len(sorted_t)-1])
def load(path): """ Loads the configuration file. A lot of the work is done by YAML. We validate the easy bits with a JSON schema. The rest by hand. """ # TODO Cache schema and configuration file l.debug('loading configuration file ...') with open(path) as f: cfg = yaml.load(f) if not isinstance(cfg, dict): raise ConfigurationError('Configuration file is empty') l.debug(' - checking schema') # First small fixes which the schema can't handle cfg.setdefault('servers', {}) cfg['servers'].setdefault('$default', {}) for key in cfg['servers']: if cfg['servers'][key] is None: cfg['servers'][key] = dict() # Now check the schema jsonschema.validate(cfg, get_schema()) # TODO format into pretty error message l.debug(' - processing keys') new_keys = {} cfg.setdefault('keys', {}) for key_name, key in six.iteritems(cfg['keys']): # TODO handle error entry = claviger.authorized_keys.Entry.parse(key) new_key = {'key': entry.key, 'options': entry.options, 'comment': entry.comment, 'keytype': entry.keytype} new_keys[key_name] = new_key cfg['keys'] = new_keys l.debug(' - processing server stanza short-hands') new_servers = {} for server_key, server in six.iteritems(cfg['servers']): parsed_server_key = parse_server_key(server_key) server.setdefault('name', server_key) server_name = server['name'] server.setdefault('port', parsed_server_key.port) server.setdefault('user', parsed_server_key.user) server.setdefault('hostname', parsed_server_key.hostname) server.setdefault('ssh_user', server['user']) server.setdefault('present', []) server.setdefault('absent', []) server.setdefault('allow', []) server.setdefault('keepOtherKeys') server.setdefault('like', '$default' if server_key != '$default' else None) server.setdefault('abstract', parsed_server_key.abstract) prabsent = frozenset(server['present']) & frozenset(server['absent']) if prabsent: raise ConfigurationError( "Keys {0} are required to be both present and absent on {1}" .format(tuple(prabsent), server_name)) ablow = frozenset(server['allow']) & frozenset(server['absent']) if ablow: raise ConfigurationError( "Keys {0} are listed allowed and absent on {1}" .format(tuple(ablow), server_name)) for key_name in itertools.chain(server['present'], server['absent'], server['allow']): if not key_name in cfg['keys']: "Key {0} (on {1}) does not exist".format(key_name, server_name) if server_name in new_servers: raise ConfigurationError( "Duplicate server name {0}".format(server_name)) new_servers[server_name] = server cfg['servers'] = new_servers l.debug(' - resolving server stanza inheritance') # create dependancy graph and use Tarjan's algorithm to find a possible # order to evaluate the server stanzas. server_dg = {server_name: [server['like']] if server['like'] else [] for server_name, server in six.iteritems(cfg['servers'])} for server_cycle_names in tarjan.tarjan(server_dg): if len(server_cycle_names) != 1: raise ConfigurationError( "There is a cyclic dependacy among the servers {0}".format( server_cycle_names)) target_server = cfg['servers'][server_cycle_names[0]] if not target_server['like']: continue if not target_server['like'] in cfg['servers']: pass source_server = cfg['servers'][target_server['like']] # First the simple attributes for attr in ('port', 'user', 'hostname', 'ssh_user', 'keepOtherKeys'): if attr in source_server: if target_server[attr] is None: target_server[attr] = source_server[attr] # Now, the present/absent/allow lists for key in source_server['present']: if key in target_server['absent']: continue if key not in target_server['present']: target_server['present'].append(key) for key in source_server['absent']: if (key in target_server['present'] or key in target_server['allow']): continue if key not in target_server['absent']: target_server['absent'].append(key) for key in source_server['allow']: if key in target_server['absent']: continue if key not in target_server['allow']: target_server['allow'].append(key) l.debug(' - setting defaults on server stanzas') for server in six.itervalues(cfg['servers']): for attr, dflt in (('port', 22), ('user', 'root'), ('keepOtherKeys', True)): if server[attr] is None: server[attr] = dflt l.debug(' ... done') return cfg
def main(data): def str_to_date(s): if s is None: return None return datetime(*strptime(s,'%Y-%m-%d')[:3]) def year_to_dates(year): return (datetime(2003+year,9,1), datetime(2004+year,8,31)) def create_tag(name, humanName, tags=[]): return Es.ecol.insert({'types': ['tag'], 'names': [name], 'humanNames': [{ 'name': name, 'human': humanName}], 'tags': tags}) print 'dropping' Es.ecol.drop() Es.rcol.drop() Es.mcol.drop() subscr_Es.ecol.drop() subscr_Es.scol.drop() print 'creating indices' Es.ensure_indices() subscr_Es.ensure_indices() mod_Es.ensure_indices() conv_inst = dict() conv_study = dict() conv_group = dict() conv_group_byname = dict() conv_event = dict() conv_seat = dict() conv_user = dict() ignore_groups = frozenset(['leden-oud']) ignore_groups_members = frozenset(['leden']) ignore_groups_ids = set() ignore_groups_members_ids = set() year_groups = frozenset( ['leden'+str(x) for x in range(1,9)]+ ['kasco'+str(x) for x in range(1,9)]+ ['bestuur'+str(x) for x in range(1,9)]) year_groups_ids = dict() year_groups_lut = {} print 'initial tags' system_tag = create_tag('!system', 'Systeemstempels') year_overrides_tag = create_tag('!year-overrides', 'Jaarlidmaatschapstempels', [system_tag]) virtual_group_tag = create_tag("!virtual-group", 'Virtuele groep', [system_tag]) sofa_brand_tag = create_tag("!sofa-brand", 'Sofa merk', [system_tag]) year_group_tag = create_tag("!year-group", 'Jaargroep', [system_tag]) for i in xrange(1,9): Es.ecol.insert({'types': ['tag'], 'humanNames': [{'human': 'Wel jaar %s' % i}], 'year-override': {'year': i, 'type': True}, 'tags': [year_overrides_tag]}) Es.ecol.insert({'types': ['tag'], 'humanNames': [{'human': 'Niet jaar %s' % i}], 'year-override': {'year': i, 'type': False}, 'tags': [year_overrides_tag]}) print 'institutes' for m in data['EduInstitute']: n = { 'types': ['institute'], 'humanNames': [{'human': m['name']}]} conv_inst[m['id']] = Es.ecol.insert(n) print 'studies' for m in data['Study']: n = { 'types': ['study'], 'humanNames': [{'human': m['name']}]} conv_study[m['id']] = Es.ecol.insert(n) print 'initial groups' conv_group_byname['bestuur'] = { 'id': Es.ecol.insert({ 'types': ['group'], 'names': ['bestuur'], 'tags': [year_group_tag], 'humanNames': [{ 'name': 'bestuur', 'human': 'Bestuur', 'genitive_prefix': 'van het'}], 'description': "Het bestuur"}), 'name': 'bestuur', 'humanName': 'Bestuur'} conv_group_byname['kasco'] = { 'id': Es.ecol.insert({ 'types': ['group'], 'names': ['kasco'], 'tags': [year_group_tag], 'humanNames': [{ 'name': 'kasco', 'human': 'Kascontrolecommissie', 'genitive_prefix': 'van de'}], 'description': "De kascontrolecommissie"}), 'name': 'kasco', 'humanName': 'Bestuur'} print 'groups' for m in data['OldKnGroup']: if m['name'] in ignore_groups: ignore_groups_ids.add(m['id']) continue if m['name'] in ignore_groups_members: ignore_groups_members_ids.add(m['id']) if m['name'] in year_groups: year_groups_ids[m['id']] = m['name'] group = m['name'][:-1] year = int(m['name'][-1:]) year_groups_lut[m['id']] = (group, year) continue if m['name'] == 'leden': m['isVirtual'] = False # fix for leden n = { 'types': ['tag' if m['isVirtual'] else 'group'], 'names': [m['name']], 'humanNames': [{ 'name': m['name'], 'human': m['humanName'], 'genitive_prefix': m['genitive_prefix'] }], 'description': m['description'], 'temp':{ 'is_virtual': m['isVirtual'] } } conv_group[m['id']] = {'id': Es.ecol.insert(n), 'name': m['name'], 'humanName': m['humanName']} conv_group_byname[m['name']] = conv_group[m['id']] print 'group hierarchy' for m in data['OldKnGroup']: if m['name'] in year_groups or m['name'] in ignore_groups: continue if m['parent'] is not None: if not m['parent'] in conv_group: print " %s was orphaned" % m['name'] continue Es.ecol.update({'_id': conv_group[m['id']]['id']}, {'$push': {'tags': conv_group[ m['parent']]['id']}}) print 'users' for m in data['OldKnUser']: bits = m['password'].split('$') if len(bits) == 3: pwd = {'algorithm': bits[0], 'salt': bits[1], 'hash': bits[2]} else: pwd = None n = { 'types': ['user'], 'names': [m['username']], 'humanNames': [{'human': m['first_name'] + ' ' + m['last_name']}], 'person': { 'titles': [], 'nick': m['first_name'], 'given': None, 'family': m['last_name'], 'gender': m['gender'], 'dateOfBirth': str_to_date(m['dateOfBirth']) }, 'emailAddresses': [ {'email': m['email'], 'from': DT_MIN, 'until': DT_MAX }], 'addresses': [ {'street': m['addr_street'], 'number': m['addr_number'], 'zip': m['addr_zipCode'], 'city': m['addr_city'], 'from': DT_MIN, 'until': DT_MAX }], 'telephones': [ {'number': m['telephone'], 'from': DT_MIN, 'until': DT_MAX}], 'studies': [ {'institute': conv_inst.get(m['institute']), 'study': conv_study.get(m['study']), 'from': DT_MIN, 'until': DT_MAX, 'number': m['studentNumber']} ], 'temp': { 'oud': m['in_oud'], 'aan': m['in_aan'], 'incasso': m['got_incasso'], 'joined': m['dateJoined'], 'remarks': m['remarks'] }, 'is_active': m['is_active'], 'password': pwd } conv_user[m['id']] = Es.ecol.insert(n) for g in m['groups']: if g in ignore_groups_ids or \ g in ignore_groups_members_ids: continue if g in year_groups_ids: gname, year = year_groups_lut[g] f, u = year_to_dates(year) Es.rcol.insert({ 'with': conv_group_byname[gname]['id'], 'who': conv_user[m['id']], 'from': f, 'until': u, 'how': None}) continue Es.rcol.insert({ 'with': conv_group[g]['id'], 'who': conv_user[m['id']], 'from': DT_MIN, 'until': DT_MAX, 'how': None}) print 'brands' for m in data['OldSeat']: if m['name'] == 'deelhoofd': m['name'] = 'graficideelhoofd' if m['name'] in conv_seat: continue n = {'types': ['brand'], 'names': [m['name']], 'tags': [sofa_brand_tag], 'humanNames': [{'name': m['name'], 'human': m['humanName']}]} conv_seat[m['name']] = {'id': Es.ecol.insert(n)} print 'seats' for m in data['OldSeat']: if m['group'] in year_groups_ids: gname = year_groups_ids[m['group']] gdat = conv_group_byname[gname[:-1]] _from, until = year_to_dates(int(gname[-1:])) else: gdat = conv_group[m['group']] _from, until = DT_MIN, DT_MAX n = {'types': ['group'], 'names': [gdat['name'] + '-' + m['name']], 'description': [m['description']], 'tags': [virtual_group_tag], 'virtual': { 'type': 'sofa', 'with': gdat['id'], 'how': conv_seat[m['name']]['id']}, 'humanNames': [{ 'name': gdat['name'] + '-' + m['name'], 'human': m['humanName'] + ' ' + gdat['humanName']}]} i = Es.ecol.insert(n) Es.rcol.insert({'who': conv_user[m['user']], 'from': _from, 'until': until, 'how': conv_seat[m['name']]['id'], 'with': gdat['id']}) print 'merging relations' print ' list until' lut = dict() plan_changes = dict() plan_remove = set() for r in Es.rcol.find({'until': {'$lt': DT_MAX}}): lut[r['until'] + timedelta(1,0), r['with'], r['how'], r['who']] = r['_id'] print ' crossreference from' for r in Es.rcol.find({'from': {'$gt': DT_MIN}}): n = (r['from'], r['with'], r['how'], r['who']) if n not in lut: continue plan_changes[lut[n]] = (r['until'], r['_id']) plan_remove.add(r['_id']) print ' transitive closure of plan' print 'small final tweaks to groups' Es.ecol.update({'names': 'leden'}, {'$push': {'tags': year_group_tag}}) done = False while not done: done = True for k,v in plan_changes.iteritems(): if v[1] in plan_changes: plan_changes[k] = plan_changes[v[1]] del plan_changes[v[1]] done = False break print ' execute' for r in plan_remove: Es.rcol.remove({'_id': r}) for k,v in plan_changes.iteritems(): Es.rcol.update({'_id': k}, {'$set': {'until': v[0]}}) print 'event' for m in data['Event']: if m['owner'] not in conv_group: gname = year_groups_lut[m['owner']][0] gid = conv_group_byname[gname]['id'] else: gid = conv_group[m['owner']]['id'] conv_event[m['id']] = subscr_Es.ecol.insert({ 'mailBody': m['mailBody'], 'humanName': m['humanName'], 'description': m['description'], 'cost': m['cost'], 'is_open': m['is_open'], 'owner': gid, 'name': m['name']}) print 'event subscriptions' for m in data['EventSubscription']: subscr_Es.scol.insert({ 'event': conv_event[m['event']], 'userNotes': m['userNotes'], 'debit': m['debit'], 'user': conv_user[m['user']]}) print 'giedo updatedb' update_db(None) print 'alias' print ' ids_by_names' name2id = Es.ids_by_names() print ' to graph' alias_graph = {} for m in data['Alias']: alias_graph[m['source']] = m['target'] print ' tarjan' for scc in tarjan(alias_graph): assert len(scc) == 1 src = scc[0] if src in name2id: continue if not src in alias_graph: continue if not alias_graph[src] in name2id: print ' ? %s -> %s' % (src, alias_graph[src]) continue name2id[src] = name2id[alias_graph[src]] Es.ecol.update({'names': alias_graph[src]}, {'$push': {'names': src}})
graph = { 'controller': ['handlebars', 'messagefactory', 'jquery'], 'messagefactory': ['jquery', 'notify'], 'storage': ['jquery', 'messagefactory'], 'meetingstorage': ['jquery', 'storage'], 'synchronizer': ['jquery'], 'livesearch': ['jquery'], 'quickupload': ['jquery'], 'notify': ['jquery'], 'qtip': ['jquery'], 'autocomplete': ['jquery'], 'tooltip': ['jquery', 'qtip'], 'trixcustom': ['trix'], 'trix': [], 'base': ['jquery'], 'datetimepicker_base': ['jquery'], 'datetimepicker_meeting': ['jquery', 'datetimepicker_base'], 'handlebars': [], 'editbox': ['jquery', 'controller'], 'editor': ['jquery', 'handlebars', 'controller', 'meetingstorage', 'synchronizer'], 'meeting': ['jquery', 'controller', 'editbox', 'pin'], 'protocol': ['jquery', 'meetingstorage', 'pin', 'synchronizer', 'controller', 'scrollspy', 'autocomplete'], 'scrollspy': ['jquery'], 'pin': ['jquery'], 'prepoverlay': ['jquery'], 'ajax-prefilter': ['jquery'], 'breadcrumbs': ['jquery'], } print tarjan(graph)
def find_clusters_from_peaks2(self, peaks, near_adj_matr, allowed_elecs): """ More advanced clustering algo from Honghui. """ steplength = 5 windowLength = 15 distance = 25 CLUSTERS = [] # allowed_elecs = np.array([e in self.elec_types_allowed for e in self.e_type]) # xyz_tmp = np.stack(self.elec_xyz_avg[allowed_elecs]) # elec_dists = squareform(pdist(xyz_tmp)) # near_adj_matr = (elec_dists < 25) & (elec_dists > 0) peaks[:, ~allowed_elecs] = False peakid = peaks # peak,peakid=par_find_peaks_by_ev(self.subject_data[:,:,allowed_elecs].mean(dim='events')) windows = np.stack([[i, windowLength + i] for i in range(0, 201 - steplength, steplength)]) for ire in range(10): peak_counts = np.sum(peakid, axis=1) window_counts = np.array([sum(peak_counts[w[0]:w[1]]) for w in windows]) peak_window = np.argmax(window_counts) near_this_ev = near_adj_matr.copy() peak_within_window = np.any(peakid[windows[peak_window, 0]:windows[peak_window, 1]], axis=0) near_this_ev[~peak_within_window, :] = False near_this_ev[:, ~peak_within_window] = False # use targan algorithm to find the clusters graph = {} for elec, row in enumerate(near_this_ev): graph[elec] = np.where(row)[0] groups = tarjan(graph) # choose the connected componet with most electrodes as seed clusterSeed = sorted(sorted(groups, key=lambda a: -len(a))[0]) # make it a dictionary window_true = np.zeros((200), dtype=bool) window_true[windows[peak_window, 0]:windows[peak_window, 1]] = True cluster = {} if len(clusterSeed) > 1: for i in clusterSeed: peak_freq = np.squeeze(np.where(np.logical_and(peakid[:, i], window_true))[0][0]) cluster[i] = peak_freq peakid[peak_freq, i] = False if len(cluster) > 1: for ire2 in range(10): for i in range(len(near_adj_matr)): if i not in cluster: near_freqS = np.squeeze(list(cluster.values()))[near_adj_matr[i, list(cluster.keys())]] if len(near_freqS) > 1: window_true = np.zeros((200), dtype=bool) window_true[windows[peak_window, 0]:windows[peak_window, 1]] = True near_freq = int(np.median(near_freqS)) electrode_frequency = np.where(peakid[:, i])[0] if np.any(np.abs(electrode_frequency - near_freq) < 15): peak_freq = np.array(min(electrode_frequency, key=lambda x: abs(x - near_freq))) cluster[i] = peak_freq peakid[peak_freq, i] = False CLUSTERS.append(cluster) for i in CLUSTERS: for j in i: i[j] = self.freqs[i[j]] res = {} i = 0 while i < (len(CLUSTERS)): if len(CLUSTERS[i]) > 3: res[i] = CLUSTERS[i] i += 1 cluster_count = 0 df_list = [] for i in res.keys(): cluster_count += 1 col_name = 'cluster{}'.format(cluster_count) cluster_df = pd.DataFrame(data=np.full(shape=(peaks.shape[1]), fill_value=np.nan), columns=[col_name]) for j in res[i]: cluster_df.iloc[j] = res[i][j] df_list.append(cluster_df) df = None if df_list: df = pd.concat(df_list, axis='columns') x, y, z = self._get_elec_xyz().T df['x'] = x df['y'] = y df['z'] = z df['label'] = self.elec_info['label'] return df
def find_clusters_from_peaks(self, peaks, near_adj_matr, allowed_elecs): """ Given a a frequency by channel array, use the tarjan algorithm to identify clusters of electrodes. Parameters ---------- peaks: numpy.ndarray frequency x channel boolean array near_adj_matr: numpy.ndarray square boolean array indicating whether any two electrodes are considered to be near each other allowed_elecs: numpy.array boolean array the same length as the number of electrodes, indicating whether an electrode can be included or should be automatically excluded Returns ------- pandas.DataFrame with a row for each electrode and a olumn for each cluster, named cluster1, cluster2, ... The value indicates the frequency of the peak for that electrode. NaN means no peak/not in cluster. """ # compute frequency bins window_centers = np.arange(self.freqs[0], self.freqs[-1] + .001, 1) windows = [(x - self.cluster_freq_range / 2., x + self.cluster_freq_range / 2.) for x in window_centers] window_bins = np.stack([(self.freqs >= x[0]) & (self.freqs <= x[1]) for x in windows], axis=0) # make sure only electrodes of allowed types are included peaks[:, ~allowed_elecs] = False # bin peaks, count them up, and find the peaks (of the peaks...)xw binned_peaks = np.stack([np.any(peaks[x], axis=0) for x in window_bins], axis=0) peak_freqs = my_local_max(binned_peaks.sum(axis=1)) # for each peak frequency, identify clusters cluster_count = 0 df_list = [] for this_peak_freq in peak_freqs: near_this_peak_freq = near_adj_matr.copy() # This is leaving in only electrodes with a peak at this freq? near_this_peak_freq[~binned_peaks[this_peak_freq]] = False near_this_peak_freq[:, ~binned_peaks[this_peak_freq]] = False # use targan algorithm to find the clusters graph = {} for elec, row in enumerate(near_this_peak_freq): graph[elec] = np.where(row)[0] clusters = tarjan(graph) # only keep clusters with enough electrodes good_clusters = np.array([len(x) for x in clusters]) >= self.min_num_elecs for good_cluster in np.where(good_clusters)[0]: cluster_count += 1 # store all electrodes in the cluster col_name = 'cluster{}'.format(cluster_count) cluster_df = pd.DataFrame(data=np.full(shape=(peaks.shape[1]), fill_value=np.nan), columns=[col_name]) # find mean frequency of cluster, first taking the mean freq within each electrode and then across mean_freqs = [] for elec in peaks[window_bins[this_peak_freq]][:, clusters[good_cluster]].T: mean_freqs.append(np.mean(self.freqs[window_bins[this_peak_freq]][elec])) cluster_df.iloc[clusters[good_cluster], 0] = mean_freqs df_list.append(cluster_df) # also add some useful info to the table. x,y,z and electrode name df = None if df_list: df = pd.concat(df_list, axis='columns') x, y, z = self._get_elec_xyz().T df['x'] = x df['y'] = y df['z'] = z df['label'] = self.elec_info['label'] # return df with column for each cluster return df
dep_instance = instances.get(dep) if dep_instance is not None: dep_instance._add_in_dep(instance.iid) with open('{}/{}:{}.execution.json'.format(PATH, KEY, REPLICA)) as f: for data in json.load(f): execution_order.append(data['InstanceID']) last_executed = instances.get(execution_order[-1]) # work out the expected execution order dep_graph = {} for instance in instances.values(): dep_graph[instance.iid] = instance._deps tsorted = tarjan(dep_graph) def _component_cmp(x, y): x = instances[x] y = instances[y] xID = UUID(x.iid) yID = UUID(y.iid) if x.seq != y.seq: return int(x.seq - y.seq) elif xID.time != yID.time: return int(xID.time - yID.time) else: return -1 if xID.bytes < yID.bytes else 1 strong_map = {}
b_int8__129 = Bad('int8_-129') b_int8_128 = Bad('int8_128') b_uint8__1 = Bad('uint8_-1') b_uint8_257 = Bad('uint8_256') #------------------------- # End of test declarations #------------------------- # Tarjan the tests, the result is an ordering of the cliques. tests = dict((t, t.deps) for t in locals().values() if isinstance(t, Atest)) tj_tests = tarjan(tests) # Flatten and error when circular dep of tests (a clique of more than one elt). tests = [] for i in range(len(tj_tests)): if len(tj_tests[i]) != 1: print("ERROR, the tests {} have a circular dependency." "".format(str(tj_tests[i]))) exit(-1) else: tests.append(tj_tests[i][0]) # Now tests is correctly sorted and ready to be used. nb_tests = len(tests)
def test_tarjan(graph, node, expected): assert tarjan(graph, node)[2] == expected