def quantifier_alternation_graph(prog: Program, exprs: List[Expr]) -> DiGraph: qa_graph = DiGraph() for expr in exprs: qa_graph.add_edges_from(qa_edges_expr(prog, expr)) return qa_graph
def graph_fixture(): """ Sets up our fixture class hierarchy graph for the metrics unit-tests. This class hierarchy looks like this (directed graph): R / \ 0 1 / \ 2 3 / \ 4 5 | 6 """ G = DiGraph() G.add_edges_from([ (ROOT, 0), (ROOT, 1), (1, 2), (1, 3), (2, 4), (2, 5), (5, 6), ]) return G
def cache_langs(): ''' Read in all files and save as pickle ''' langs = {} dir_path = Path(LANGS_DIR) # Sort by language code paths = sorted(dir_path.glob('./*/config.y*ml'), key=lambda x: x.parent.stem) mappings_legal_pairs = [] for path in paths: code = path.parent.stem with open(path, encoding='utf8') as f: data = yaml.safe_load(f) # If there is a mappings key, there is more than one mapping # TODO: should put in some measure to prioritize non-generated mappings and warn when they override if 'mappings' in data: for index, mapping in enumerate(data['mappings']): mappings_legal_pairs.append((data['mappings'][index]['in_lang'], data['mappings'][index]['out_lang'])) data['mappings'][index] = load_mapping_from_path(path, index) else: data = load_mapping_from_path(path) langs = {**langs, **{code: data}} # Save as a Directional Graph lang_network = DiGraph() lang_network.add_edges_from(mappings_legal_pairs) with open(LANGS_NWORK_PATH, 'wb') as f: write_gpickle(lang_network, f, protocol=4) with open(LANGS_PKL, 'wb') as f: pickle.dump(langs, f, protocol=4) return langs
def transform(transformer_graph: nx.DiGraph, field_graph: nx.DiGraph, actives: Set[Tuple[Field, Transformer]]) -> dict: # Pick next active pair. current_pair = None for pair in actives: if current_pair is None or pair.field.power > current_pair.field.power: current_pair = pair actives.remove(current_pair) current_field, current_transformer = current_pair assert current_field in field_graph new_field_graph = current_transformer.apply(current_field) # Sort fields by key. new_fields = defaultdict(list) for field in new_field_graph.nodes: key = new_field_graph.nodes[field]['key'] new_fields[key].append(field) for node, data in new_field_graph.nodes(data=True): if node in field_graph.nodes: assert data['key'] == 'initial' else: field_graph.add_node(node, **data) field_graph.add_edges_from(new_field_graph.edges(data=True)) for _, transformer, data in transformer_graph.out_edges(current_transformer, data=True): key = data['key'] for field in new_fields[key]: actives.add((field, transformer))
def common_edge_ratio(ref_user_connections, eval_user_connections, is_directed=False): """ caulcalate the fraction of common edges fraction out of union of two graphs Parameters: ========== ref_user_connections: a list of edges eval_user_connections: a list of edges is_directed: boolean, False (default): edges forms an undirected graph True: edges forms a directed graph """ ref_user_connections = _normalize_connections(ref_user_connections, is_directed) eval_user_connections = _normalize_connections(eval_user_connections, is_directed) if is_directed: ref_graph, eval_graph = DiGraph(), DiGraph() else: ref_graph, eval_graph = Graph(), Graph() ref_graph.add_edges_from(ref_user_connections) eval_graph.add_edges_from(eval_user_connections) ref_edges, eval_edges = ref_graph.edges(), eval_graph.edges() tot_common = sum([1 if edge in ref_edges else 0 for edge in eval_edges]) union_size = len(ref_edges) + len(eval_edges) - tot_common return tot_common / union_size
def test_keep_one_level(): # Not mocked: _find_bottom_of_hierarchy, # _remove_level_from_hierarchy, _summate_connections. hierarchy = { 'A-J': { 'A-A': {} }, 'A-B': { 'A-I': { 'A-F': { 'A-K': {}, 'A-L': {} }, 'A-H': {} }, 'A-D': { 'A-E': {} } } } mock_conn = DiGraph() mock_conn.add_edges_from([('A-D', 'A-Z'), ('A-Y', 'A-F'), ('A-J', 'A-X')]) mapp = MapGraph() mapp.add_nodes_from( ['A-A', 'A-B', 'A-D', 'A-E', 'A-F', 'A-H', 'A-I', 'A-J', 'A-K', 'A-L']) mapp.cong = mock_conn mapp._keep_one_level(hierarchy, 'A') nt.assert_equal(mapp.cong.edges(), [('A-J', 'A-X'), ('A-D', 'A-Z'), ('A-Y', 'A-F')]) nt.assert_equal(mapp.nodes(), ['A-H', 'A-J', 'A-D', 'A-F'])
def test_merge_identical_nodes(): mock_conn = DiGraph() mock_conn.add_edges_from([('A-1', 'A-5'), ('A-4', 'A-1')]) mapp = MapGraph() # Here we aren't adding the reciprocals, because add_edges_from # has been mocked. And _merge_identical_nodes is designed only to # get a node's neighbors (i.e., its successors), assuming that # these are the same as its predecessors. mapp.add_edges_from([('A-1', 'A-3', { 'RC': 'S', 'PDC': 5 }), ('A-1', 'B-1', { 'RC': 'I', 'PDC': 7 }), ('A-1', 'C-1', { 'RC': 'L', 'PDC': 10 }), ('A-1', 'A-2', { 'RC': 'I', 'PDC': 12 })]) mapp.cong = mock_conn mapp._merge_identical_nodes('A-2', 'A-1') nt.assert_equal(mapp.cong.edges(), [('A-2', 'A-5'), ('A-4', 'A-2')]) nt.assert_equal(mapp.edges(), [('A-2', 'B-1'), ('A-2', 'C-1')])
class BigQueryGrapher(BaseGrapher): def __init__(self, bq_service=None, gcs_service=None): super().__init__(gcs_service=gcs_service) self.bq_service = bq_service or BigQueryService() @property def metadata(self): #meta = super().metadata #meta["bq_service"] = self.bq_service.metadata #return meta return {**super().metadata, **self.bq_service.metadata} # merges dicts @profile def perform(self): self.graph = DiGraph() self.running_results = [] for row in self.bq_service.fetch_user_friends_in_batches(): self.counter += 1 if not self.dry_run: self.graph.add_edges_from([(row["screen_name"], friend) for friend in row["friend_names"]]) if self.counter % self.batch_size == 0: rr = {"ts": logstamp(), "counter": self.counter, "nodes": len(self.graph.nodes), "edges": len(self.graph.edges)} print(rr["ts"], "|", fmt_n(rr["counter"]), "|", fmt_n(rr["nodes"]), "|", fmt_n(rr["edges"])) self.running_results.append(rr)
def test(): bayesian_network = DiGraph() edges = [('A', 'C'), ('B', 'C'), ('C', 'D'), ('C', 'E'), ('D', 'F'), ('D', 'G')] bayesian_network.add_edges_from(edges) for node in bayesian_network.nodes(): node_object = bayesian_network.node[node] # All the variables are binary node_object['values'] = ['0', '1'] conditional_probabilities = { 'A1': 0.7, 'A0':0.3, 'B1': 0.4, 'B0':0.6, 'C1|A0,B0': 0.1, 'C1|A1,B0': 0.3, 'C1|A0,B1': 0.5, 'C1|A1,B1': 0.9, 'C0|A0,B0': 0.9, 'C0|A1,B0': 0.7, 'C0|A0,B1': 0.5, 'C0|A1,B1': 0.1, 'D1|C0': 0.8, 'D1|C1': 0.3, 'D0|C0': 0.2, 'D0|C1': 0.7, 'E1|C0': 0.2, 'E1|C1': 0.6, 'E0|C0': 0.8, 'E0|C1': 0.4, 'F1|D0': 0.1, 'F1|D1': 0.7, 'F0|D0': 0.9, 'F0|D1': 0.3, 'G1|D0': 0.9, 'G1|D1': 0.4, 'G0|D0': 0.1, 'G0|D1': 0.6 } inference = PearlsInference(bayesian_network, conditional_probabilities) print '-------------------------------' inference.add_evidence(['C', '1']) print '----------------------------------' inference.add_evidence(['A', '1']) pprint(conditional_probabilities)
def to_graph(self) -> DiGraph: graph = DiGraph() graph.add_nodes_from([v.id for v in self.nodes]) graph.add_edges_from([(e.from_node_id, e.to_node_id) for e in self.links]) return graph
def get_graph_with_specified_relation(graph: MultiDiGraph, relation="hiperonimia") -> DiGraph: edges_with_hiperonimia = {(e[0], e[1]): e[2] for e in graph.edges if e[2].endswith(relation)} filtered_graph = DiGraph() filtered_graph.add_edges_from(edges_with_hiperonimia) return filtered_graph
def load( cls: Type["RelationDAG"], database: Database, extend_relations: List[Relation], ignore_relations: List[Relation], ignore_tables: List[Table], ) -> "RelationDAG": """ Create a RelationDAG The data loaded from this method is sourced from the database and from the user config """ graph = DiGraph(name="RelationDAG") # Get actual table instances tables = database.tables.__dict__.values() # Create relations from table data and add the ones specified in settings relations = Relation.from_tables(tables) + extend_relations # Create graph graph.add_nodes_from(tables) graph.add_edges_from([r.edge for r in relations]) # Remove excluded entities (tables and relations) from the created graph graph.remove_edges_from([r.edge for r in ignore_relations]) graph.remove_nodes_from(ignore_tables) # Create RelationDAG instance return cls.from_graph(graph)
def test_get_rc_chain(): mock_g = DiGraph() mock_g.add_edges_from([('A', 'B', {'RC': 'I'}), ('B', 'C', {'RC': 'S'}), ('C', 'D', {'RC': 'L'}), ('D', 'E', {'RC': 'O'})]) tp = ['B', 'C', 'D'] nt.assert_equal(MapGraph._get_rc_chain.im_func(mock_g, 'A', tp, 'E'), 'ISLO')
def test_remove_node(): mock_mapp = DiGraph() mock_mapp.add_node('X') mock_mapp.add_edges_from([('A', 'B', {'TP': ['X']}), ('B', 'C', {'TP': ['Y']})]) MapGraph.remove_node.im_func(mock_mapp, 'X') nt.assert_equal(mock_mapp.edges(), [('B', 'C')])
def _build_dom_sets(self, graph: nx.DiGraph, gname=''): v_entry = 'virtual_entry' graph.add_edges_from( ((v_entry, node) for node, i in tuple(graph.in_degree) if i == 0)) if v_entry not in graph: err_msg = f'Failed to find an entry to build dominance tree for {gname}' logging.error(err_msg) raise NoEntryForDomTreeError(err_msg) dom_tree = nx.DiGraph() dom_tree.add_nodes_from(graph.nodes) for node, dominator in nx.immediate_dominators(graph, v_entry).items(): dom_tree.add_edge(dominator, node) dom_tree.remove_node(v_entry) dominances = { node: set.union(nx.descendants(dom_tree, node), { node, }) for node in dom_tree.nodes } frontier = nx.dominance_frontiers(graph, v_entry) frontier.pop(v_entry) graph.remove_node(v_entry) return dom_tree, dominances, frontier
def graph(self) -> DiGraph: """ Get a graph of the job indicating the inputs to the job. Returns ------- DiGraph The graph showing the connectivity of the jobs. """ from networkx import DiGraph edges = [] for uuid, refs in self.input_references_grouped.items(): properties: list[str] | str = [ ref.attributes_formatted[-1] .replace("[", "") .replace("]", "") .replace(".", "") for ref in refs if ref.attributes ] properties = properties[0] if len(properties) == 1 else properties properties = properties if len(properties) > 0 else "output" edges.append((uuid, self.uuid, {"properties": properties})) graph = DiGraph() graph.add_node(self.uuid, job=self, label=self.name) graph.add_edges_from(edges) return graph
def ad_graph(f): # asynchronous dynamics as graph adG = DiGraph() adf = sd_to_ad(f) adG.add_nodes_from(f.keys()) adG.add_edges_from([(k, v) for k in adf.keys() for v in adf[k]]) return adG
def check_pipeline_structure(cls, values): graph = DiGraph() graph.add_nodes_from([v.node_id for v in values.get('nodes')]) graph.add_edges_from([(e.from_, e.to) for e in values.get('links')]) validate_pipeline_structure(graph) return values
def _transitive_closure(def_: Definition, graph: networkx.DiGraph, result: networkx.DiGraph, visited: Optional[Set[Definition]] = None): if def_ in self._transitive_closures.keys(): return self._transitive_closures[def_] predecessors = list(graph.predecessors(def_)) result.add_node(def_) result.add_edges_from( list( map(lambda e: (*e, graph.get_edge_data(*e)), map(lambda p: (p, def_), predecessors)))) visited = visited or set() visited.add(def_) predecessors_to_visit = set(predecessors) - set(visited) closure = reduce( lambda acc, definition: _transitive_closure( definition, graph, acc, visited), predecessors_to_visit, result) self._transitive_closures[def_] = closure return closure
def _transitive_closure(def_: Definition, graph: networkx.DiGraph, result: networkx.DiGraph, visited: Optional[Set[Definition]] = None): """ Returns a joint graph that comprises the transitive closure of all defs that `def_` depends on and the current graph `result`. `result` is updated. """ if def_ in self._transitive_closures.keys(): closure = self._transitive_closures[def_] # merge closure into result result.add_edges_from(closure.edges()) return result predecessors = list(graph.predecessors(def_)) result.add_node(def_) result.add_edges_from( list( map(lambda e: (*e, graph.get_edge_data(*e)), map(lambda p: (p, def_), predecessors)))) visited = visited or set() visited.add(def_) predecessors_to_visit = set(predecessors) - set(visited) closure = reduce( lambda acc, def0: _transitive_closure( def0, graph, acc, visited), predecessors_to_visit, result) self._transitive_closures[def_] = closure return closure
def test_get_worst_pdc_in_tp(): mock_g = DiGraph() mock_g.add_edges_from([('A', 'B', {'PDC': 0}), ('B', 'C', {'PDC': 5}), ('C', 'D', {'PDC': 7}), ('D', 'E', {'PDC': 17})]) tp = ['B', 'C', 'D'] nt.assert_equal(MapGraph._get_worst_pdc_in_tp.im_func(mock_g, 'A', tp, 'E'), 17)
def test_missing_edges(self): """A tournament must not have any pair of nodes without at least one edge joining the pair. """ G = DiGraph() G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3)]) assert not is_tournament(G)
def test_get_best_is(): mock_mapp = DiGraph() mock_mapp.add_edges_from([('A-1', 'B-1', {'RC': 'I', 'PDC': 5}), ('A-1', 'B-2', {'RC': 'I', 'PDC': 7}), ('A-1', 'B-3', {'RC': 'S', 'PDC': 5})]) result = MapGraph._get_best_is.im_func(mock_mapp, 'A-1', ['B-1', 'B-2', 'B-3']) nt.assert_equal(result, ('B-1', 5))
def test_missing_edges(self): """A tournament must not have any pair of nodes without at least one edge joining the pair. """ G = DiGraph() G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3)]) assert_false(is_tournament(G))
def test_get_worst_pdc(): mock_mapp = DiGraph() mock_mapp.add_edges_from([('A-1', 'B-1', {'RC': 'L', 'PDC': 5}), ('A-1', 'B-2', {'RC': 'O', 'PDC': 7}), ('A-1', 'B-3', {'RC': 'L', 'PDC': 15})]) result = MapGraph._get_worst_pdc.im_func(mock_mapp, 'A-1', ['B-1', 'B-2', 'B-3']) nt.assert_equal(result, 15)
def mock_graph(mock_user_friends): graph = DiGraph() for row in mock_user_friends: user = row["screen_name"] friends = row["friend_names"] graph.add_node(user) graph.add_nodes_from(friends) graph.add_edges_from([(user, friend) for friend in friends]) return graph
def digraph_from_graph_list(graph_list): edges = [] for line in graph_list: nodes = line.split(' -> ') for num in range(1, len(nodes)): edges.append((nodes[num - 1], nodes[num])) graph = DiGraph() graph.add_edges_from(edges) return graph
def add_edges_from(self, ebunch): temp = self.copy() DiGraph.add_edges_from(temp, ebunch) if not temp._is_directed_acyclic_graph(): raise ValueError("Edges %s create a cycle" %(ebunch, ) ) elif not temp._is_connected(): raise ValueError("Edges %s create disconnected graph" %(ebunch, ) ) else: DiGraph.add_edges_from(self, ebunch)
def __init__(self, conec=[], **kwargs): """ Calls DiGraph constructor and checks if the graph is connected and acyclic """ DiGraph.__init__(self, **kwargs) DiGraph.add_edges_from(self, conec) #self.add_edges_from(conec) #copy maximum recursion here if not self._is_connected(): raise ValueError("Not connected graph") if not self._is_directed_acyclic_graph(): raise ValueError("Not acyclic graph")
def graph() -> DiGraph: g = DiGraph() for i in range(1, 14): g.add_node(i) g.add_edges_from([(1, 2), (1, 3), (2, 3)]) # island 1 g.add_edges_from([(4, 5), (4, 6), (6, 7)]) # island 2 g.add_edges_from([(8, 9), (9, 10), (9, 11), (8, 12), (12, 11), (12, 13)]) # island 3 return g
def add_edges_from(self, ebunch): temp = self.copy() DiGraph.add_edges_from(temp, ebunch) if not temp._is_directed_acyclic_graph(): raise ValueError("Edges %s create a cycle" % (ebunch, )) elif not temp._is_connected(): raise ValueError("Edges %s create disconnected graph" % (ebunch, )) else: DiGraph.add_edges_from(self, ebunch)
def update_from_db(self, session): # type: (Session) -> None # Only allow one thread at a time to construct a fresh graph. with self._update_lock: checkpoint, checkpoint_time = self._get_checkpoint(session) if checkpoint == self.checkpoint: self._logger.debug("Checkpoint hasn't changed. Not Updating.") return self._logger.debug("Checkpoint changed; updating!") start_time = datetime.utcnow() user_metadata = self._get_user_metadata(session) groups, disabled_groups = self._get_groups(session, user_metadata) permissions = self._get_permissions(session) group_grants = self._get_group_grants(session) group_service_accounts = self._get_group_service_accounts(session) service_account_grants = all_service_account_permissions(session) nodes = self._get_nodes(groups, user_metadata) edges = self._get_edges(session) edges_without_np_owner = [ (n1, n2) for n1, n2, r in edges if GROUP_EDGE_ROLES[r["role"]] != "np-owner" ] graph = DiGraph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) rgraph = graph.reverse() # We need a separate graph without np-owner edges to construct the mapping of # permissions to users with that grant. permission_graph = DiGraph() permission_graph.add_nodes_from(nodes) permission_graph.add_edges_from(edges_without_np_owner) grants_by_permission = self._get_grants_by_permission( permission_graph, group_grants, service_account_grants, user_metadata) with self.lock: self._graph = graph self._rgraph = rgraph self.checkpoint = checkpoint self.checkpoint_time = checkpoint_time self.user_metadata = user_metadata self._groups = groups self._disabled_groups = disabled_groups self._permissions = permissions self._group_grants = group_grants self._group_service_accounts = group_service_accounts self._service_account_grants = service_account_grants self._grants_by_permission = grants_by_permission duration = datetime.utcnow() - start_time stats.log_rate("graph_update_ms", int(duration.total_seconds() * 1000))
def test_bidirectional_edges(self): """A tournament must not have any pair of nodes with greater than one edge joining the pair. """ G = DiGraph() G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) G.add_edge(1, 0) assert not is_tournament(G)
def test_bidirectional_edges(self): """A tournament must not have any pair of nodes with greater than one edge joining the pair. """ G = DiGraph() G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) G.add_edge(1, 0) assert_false(is_tournament(G))
def test_organize_by_rc(): mock_mapp = DiGraph() mock_mapp.add_edges_from([('A-1', 'B-1', {'RC': 'I'}), ('A-1', 'B-2', {'RC': 'L'}), ('A-1', 'C-3', {'RC': 'O'})]) result = MapGraph._organize_by_rc.im_func(mock_mapp, 'A-1', ['B-1', 'B-2']) nt.assert_equal(result, {'IS': ['B-1'], 'LO': ['B-2']}) result = MapGraph._organize_by_rc.im_func(mock_mapp, 'A-1', ['C-3']) nt.assert_equal(result, {'IS': [], 'LO': ['C-3']})
def __init__(self, transitions): graph = DiGraph() graph.add_nodes_from(transitions.keys()) for node in graph.nodes: edges = transitions[node] edges = ((node, edge) for edge in edges) graph.add_edges_from(edges) self._graph = graph
class BigQueryTopicGrapher(BigQueryGrapher): def __init__(self, users_limit=USERS_LIMIT, topic=TOPIC, convo_start_at=START_AT, convo_end_at=END_AT, bq_service=None, gcs_service=None): super().__init__(bq_service=bq_service, gcs_service=gcs_service) self.users_limit = users_limit self.topic = topic self.convo_start_at = convo_start_at self.convo_end_at = convo_end_at print("---------------------------------------") print("CONVERSATION FILTERS...") print(f" USERS LIMIT: {self.users_limit}") print(f" TOPIC: '{self.topic.upper()}' ") print(f" BETWEEN: '{self.convo_start_at}' AND '{self.convo_end_at}'") @property def metadata(self): return {**super().metadata, **{"conversation": { "users_limit": self.users_limit, "topic": self.topic, "start_at": self.convo_start_at, "end_at": self.convo_end_at, }}} # merges dicts @profile def perform(self): self.write_metadata_to_file() self.upload_metadata() self.start() self.graph = DiGraph() self.running_results = [] users = list(self.bq_service.fetch_random_users(limit=self.users_limit, topic=self.topic, start_at=self.convo_start_at, end_at=self.convo_end_at)) print("FETCHED", len(users), "USERS") screen_names = sorted([row["user_screen_name"] for row in users]) for row in self.bq_service.fetch_specific_user_friends(screen_names=screen_names): self.counter += 1 if not self.dry_run: self.graph.add_edges_from([(row["screen_name"], friend) for friend in row["friend_names"]]) if self.counter % self.batch_size == 0: rr = {"ts": logstamp(), "counter": self.counter, "nodes": len(self.graph.nodes), "edges": len(self.graph.edges)} print(rr["ts"], "|", fmt_n(rr["counter"]), "|", fmt_n(rr["nodes"]), "|", fmt_n(rr["edges"])) self.running_results.append(rr) self.end() self.report() self.write_results_to_file() self.upload_results() self.write_graph_to_file() self.upload_graph()
def test_remove_node(): mock_mapp = DiGraph() mock_mapp.add_node('X') mock_mapp.add_edges_from([('A', 'B', { 'TP': ['X'] }), ('B', 'C', { 'TP': ['Y'] })]) MapGraph.remove_node.im_func(mock_mapp, 'X') nt.assert_equal(mock_mapp.edges(), [('B', 'C')])
def example_tree(): """creates a tree/networkx.DiGraph of a syntactic parse tree""" tree = DiGraph() tree.add_nodes_from(['S', 'NP-1', 'N-1', 'Jeff', 'VP', 'V', 'ate', 'NP-2', 'D', 'the', 'N-2', 'apple']) tree.add_edges_from([('S', 'NP-1'), ('NP-1', 'N-1'), ('N-1', 'Jeff'), ('S', 'VP'), ('VP', 'V'), ('V', 'ate'), ('VP', 'NP-2'), ('NP-2', 'D'), ('D', 'the'), ('NP-2', 'N-2'), ('N-2', 'apple')]) return tree
def _convert_bfs(bfs): g = DiGraph() g.add_edges_from(bfs[NONE]) bfs[NONE] = g for k, v in bfs.items(): if k is not NONE: _convert_bfs(v) return bfs
class PsycopgGrapher(BaseGrapher): def __init__(self, dry_run=DRY_RUN, batch_size=BATCH_SIZE, users_limit=USERS_LIMIT, database_url=DATABASE_URL, table_name=USER_FRIENDS_TABLE_NAME): super().__init__(dry_run=dry_run, batch_size=batch_size, users_limit=users_limit) self.database_url = database_url self.table_name = table_name self.connection = psycopg2.connect(self.database_url) self.cursor = self.connection.cursor(name="network_grapher", cursor_factory=psycopg2.extras.DictCursor) # A NAMED CURSOR PREVENTS MEMORY ISSUES!!!! @property def metadata(self): return {**super().metadata, **{"database_url": self.database_url, "table_name": self.table_name}} # merges dicts @property def sql(self): query = f"SELECT id, user_id, screen_name, friend_count, friend_names FROM {self.table_name} " if self.users_limit: query += f"LIMIT {self.users_limit};" return query @profile def perform(self): self.start() self.write_metadata_to_file() self.upload_metadata() print(logstamp(), "CONSTRUCTING GRAPH OBJECT...") self.graph = DiGraph() self.running_results = [] self.cursor.execute(self.sql) while True: batch = self.cursor.fetchmany(size=self.batch_size) if not batch: break self.counter += len(batch) if not self.dry_run: for row in batch: self.graph.add_edges_from([(row["screen_name"], friend) for friend in row["friend_names"]]) rr = {"ts": logstamp(), "counter": self.counter, "nodes": len(self.graph.nodes), "edges": len(self.graph.edges)} print(rr["ts"], "|", fmt_n(rr["counter"]), "|", fmt_n(rr["nodes"]), "|", fmt_n(rr["edges"])) self.running_results.append(rr) self.cursor.close() self.connection.close() print(logstamp(), "GRAPH CONSTRUCTED!") self.report() self.write_results_to_file() self.upload_results() self.write_graph_to_file() self.upload_graph() self.end()
def _convert_bfs(bfs): from networkx import DiGraph g = DiGraph() g.add_edges_from(bfs[NONE]) bfs[NONE] = g for k, v in bfs.items(): if k is not NONE: _convert_bfs(v) return bfs
def test_relate_node_to_others(): mock_mapp = DiGraph() relate = MapGraph._relate_node_to_others.im_func nt.assert_equal(relate(mock_mapp, 'A-1', ['A-2', 'A-3', 'A-4']), ([], 'D')) mock_mapp.add_edge('A-1', 'A-3', RC='I') nt.assert_equal(relate(mock_mapp, 'A-1', ['A-2', 'A-3', 'A-4']), ('A-3', 'I')) mock_mapp.add_edges_from([('A-1', 'A-3', {'RC': 'L'}), ('A-1', 'A-4', {'RC': 'L'})]) nt.assert_equal(relate(mock_mapp, 'A-1', ['A-2', 'A-3', 'A-4']), (['A-3', 'A-4'], 'L'))
def read_csv_graph(filename, delimiter=',', encoding=None, reverse_edges=False, suppress_warnings=False, ignore_header=True): """Read an edgelist style graph from a CSV file. Options: filename -- path to the csv file delimiter -- CSV delimiter (e.g. ',', '\\t') reverse_edges -- flag if edges aren't in (source, dest) order suppress_warnings -- flag to ignore "bad line" warnings ignore_header -- flag to ignore the first line in CSV file """ logger = logging.getLogger(__name__) edgeset = set() with open(filename, encoding=encoding, newline='') as file_obj: reader = csv.reader(file_obj, delimiter=delimiter) # Skip header line (if specified). if ignore_header: _ = next(reader) for row in reader: pair = tuple(row if not reverse_edges else reversed(row)) def warn(message): """Log a warning on this line.""" if suppress_warnings: return full_message = 'Error in {} on line {}: {}' logger.warn(full_message.format(filename, reader.line_num, message)) if not all(pair): warn('Incomplete edge: {}'.format(pair)) elif pair in edgeset: warn('Duplicate edge: {}'.format(pair)) else: # If no errors, add the edge! edgeset.add(pair) graph = DiGraph() graph.add_edges_from(edgeset) logger.info('Loaded graph from {}'.format(filename)) return graph
def to_directed(self): from networkx import DiGraph G=DiGraph() G.name=self.name G.add_nodes_from(self) G.add_edges_from( ((u,v,deepcopy(data)) for u,nbrs in self.adjacency_iter() for v,data in nbrs.iteritems()) ) G.graph=deepcopy(self.graph) G.node=deepcopy(self.node) return G
def render_trees(parsed_dir_path: Path, target_dir_path: Path, work_distribution: list): for ecli in work_distribution: parsed_files_glob = parsed_dir_path.joinpath(ecli).glob('*.xml') for parsed_file_path in parsed_files_glob: if parsed_file_path.is_file() and parsed_file_path.stat( ).st_size != 0: json_dir_path = target_dir_path.joinpath(ecli) if not json_dir_path.is_dir(): mkdir(str(json_dir_path)) json_file_name = parsed_file_path.name + '.json' json_file_path = json_dir_path.joinpath(json_file_name) # draw_spring(tree) if not json_file_path.is_file() or json_file_path.stat( ).st_size != 0: tree = DiGraph() xml_tree = parse(str(parsed_file_path)) sentence = SENTENCE_XPATH(xml_tree)[0].text nodes = [] edges = [] for xml_node in XML_NODES(xml_tree): lemma = xml_node.get('lemma') pos = xml_node.get('pos') if lemma is None: lemma = '...' node_id = int(xml_node.attrib['id']) node_attributes = {'name': lemma, 'pos': pos} node = (node_id, node_attributes) nodes.append(node) parent_node_id = int(xml_node.getparent().get('id')) edges.append((parent_node_id, node_id)) tree.add_nodes_from(nodes) tree.add_edges_from(edges) tree_json = json_graph.tree_data(tree, root=0) wrapper_json = {'origin': parsed_file_path.as_uri(), 'sentence': sentence, 'tree': tree_json} with json_file_path.open(mode='wt') as json_file: dump(wrapper_json, json_file, indent=True) info("Rendered parse tree to '{json_file_path}'. ".format( json_file_path=json_file_path)) else: error("Empty or non-existent XML parse tree file at " "'{parsed_file_path}'. ".format( parsed_file_path=parsed_file_path))
def test_hamiltonian_cycle(self): """Tests that :func:`networkx.tournament.hamiltonian_path` returns a Hamiltonian cycle when provided a strongly connected tournament. """ G = DiGraph() G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) path = hamiltonian_path(G) assert_equal(len(path), 4) assert_true(all(v in G[u] for u, v in zip(path, path[1:]))) assert_true(path[0] in G[path[-1]])
def to_directed(self): from networkx import DiGraph G = DiGraph() G.name = self.name G.add_nodes_from(self.n) G.add_edges_from(((u, v, deepcopy(data)) for u, nbrs in self.a for v, data in nbrs.items())) G.graph = deepcopy(self.data) G._nodedata = deepcopy(self._nodedata) G.node = G._nodedata # hack to pass test return G
def test_summate_connections(): mock_cong = DiGraph() mock_cong.add_edges_from([('A-1', 'B-3'), ('B-2', 'A-1'), ('A-2', 'B-2'), ('C-3', 'B-1'), ('D-1', 'D-2')]) mock_mapg = DiGraph() mock_mapg.add_edges_from([('A-1', 'B-1', {'RC': 'I'}), ('A-1', 'C-1', {'RC': 'S'}), ('A-2', 'D-1', {'RC': 'L'}), ('A-2', 'E-1', {'RC': 'O'})]) mock_mapg.cong = mock_cong nt.assert_equal(MapGraph._summate_connections.im_func(mock_mapg, ['A-1', 'A-2']), 4)
def update_from_db(self, session): # Only allow one thread at a time to construct a fresh graph. with self.update_lock: checkpoint, checkpoint_time = self._get_checkpoint(session) if checkpoint == self.checkpoint: self.logger.debug("Checkpoint hasn't changed. Not Updating.") return self.logger.debug("Checkpoint changed; updating!") new_graph = DiGraph() new_graph.add_nodes_from(self._get_nodes_from_db(session)) new_graph.add_edges_from(self._get_edges_from_db(session)) rgraph = new_graph.reverse() users = set() groups = set() for (node_type, node_name) in new_graph.nodes(): if node_type == "User": users.add(node_name) elif node_type == "Group": groups.add(node_name) user_metadata = self._get_user_metadata(session) permission_metadata = self._get_permission_metadata(session) service_account_permissions = all_service_account_permissions(session) group_metadata = self._get_group_metadata(session, permission_metadata) group_service_accounts = self._get_group_service_accounts(session) permission_tuples = self._get_permission_tuples(session) group_tuples = self._get_group_tuples(session) disabled_group_tuples = self._get_group_tuples(session, enabled=False) with self.lock: self._graph = new_graph self._rgraph = rgraph self.checkpoint = checkpoint self.checkpoint_time = checkpoint_time self.users = users self.groups = groups self.permissions = {perm.permission for perm_list in permission_metadata.values() for perm in perm_list} self.user_metadata = user_metadata self.group_metadata = group_metadata self.group_service_accounts = group_service_accounts self.permission_metadata = permission_metadata self.service_account_permissions = service_account_permissions self.permission_tuples = permission_tuples self.group_tuples = group_tuples self.disabled_group_tuples = disabled_group_tuples
def add_nodes_from(self, nodes, **attr): H=DiGraph() H.add_nodes_from(self.names) h_names=sorted(H.nodes()) H.add_edges_from([(h_names[e[0]], h_names[e[1]], self.edge[e[0]][e[1]]) for e in self.edges()]) causes={h_names[v]: {h_names[item]: self.node[v]['causes'][item] for item in self.node[v]['causes']} for v in self.dep_vars} self.clear() self.indep_vars=[] self.dep_vars=[] if not H.nodes(): DiGraph.add_nodes_from(self, nodes, **attr) self.names=names=sorted(nodes) for i, n in enumerate(self.names): self.node[i]={'name': n, 'pmf': Pmf()} self.node[i]['pmf'].Set(1,self.p) self.node[i]['pmf'].Set(0, 1-self.p) self.remove_node(n) self.edge[i]={} self.indep_vars+=[i] self.SetProbs() return #DiGraph.add_nodes_from(self, nodes, **attr) #ind_vars=[var for var in H.indep_vars] #DiGraph.add_nodes_from(self, ind_vars) self.names=names=sorted(set(H.nodes() + nodes)) for i, n in enumerate(names): if n in H.nodes(): self.node[i], self.edge[i]=H.node[n], {names.index(item): H.edge[n][item] for item in H.edge[n]} self.node[i]['causes']={names.index(item): causes[n][item] for item in causes[n]} if n in causes else {} self.node[i]['name']=n self.node[i]['pmf']=Pmf() if not self.node[i]['causes']: self.node[i]['pmf'].Set(1,self.p) self.node[i]['pmf'].Set(0, 1-self.p) self.indep_vars+=[i] else: self.dep_vars+=[i] else: self.node[i]={'name': n, 'pmf': Pmf()} self.node[i]['pmf'].Set(1,self.p) self.node[i]['pmf'].Set(0, 1-self.p) #self.remove_node(n) self.edge[i]={} self.indep_vars+=[i] self.SetProbs()
def to_directed(self): """Return a directed representation of the graph. A new directed graph is returned with the same name, same nodes, and with each edge (u,v,data) replaced by two directed edges (u,v,data) and (v,u,data). """ from networkx import DiGraph G=DiGraph() G.add_nodes_from(self) G.add_edges_from( ((u,v,data) for u,nbrs in self.adjacency_iter() for v,data in nbrs.iteritems()) ) return G
def __extract_graph(self, post_ids, meme_ids, graph_fname): """ Extract graph from given meme id's. :param post_ids: list of posts id's related to memes :param meme_ids: list of memes id's :param graph_fname: the file name to save graph data :return: directed graph of all reshares """ t0 = time.time() logger.info('\tquerying reshares ...') reshares = mongodb.reshares.find( {'post_id': {'$in': post_ids}, 'reshared_post_id': {'$in': post_ids}}, {'_id': 0, 'post_id': 1, 'reshared_post_id': 1, 'user_id': 1, 'ref_user_id': 1}).sort('datetime') resh_count = reshares.count() reshares.rewind() logger.info('\ttime: %.2f min' % ((time.time() - t0) / 60.0)) logger.info('\textracting graph from %d posts and %s reshares ...' % (len(post_ids), resh_count)) edges = [] meme_ids = set(meme_ids) i = 0 # Iterate on reshares to extract graph edges. for resh in reshares: user_id = resh['user_id'] ref_user_id = resh['ref_user_id'] if user_id != ref_user_id: src_meme_ids = {pm['meme_id'] for pm in mongodb.postmemes.find({'post_id': resh['reshared_post_id']}, {'_id': 0, 'meme_id': 1})} dest_meme_ids = {pm['meme_id'] for pm in mongodb.postmemes.find({'post_id': resh['post_id']}, {'_id': 0, 'meme_id': 1})} common_memes = meme_ids & src_meme_ids & dest_meme_ids if common_memes: edges.append((ref_user_id, user_id)) i += 1 if i % (resh_count / 10) == 0: logger.info('\t%d%% reshares done' % (i * 100 / resh_count)) graph = DiGraph() graph.add_edges_from(edges) logger.info('\tsaving graph ...') self.save_param(graph, graph_fname, ParamTypes.GRAPH) logger.info('\tgraph extraction time: %.2f min' % ((time.time() - t0) / 60.0)) return graph
def test_merge_identical_nodes(): mock_conn = DiGraph() mock_conn.add_edges_from([('A-1', 'A-5'), ('A-4', 'A-1')]) mapp = MapGraph() # Here we aren't adding the reciprocals, because add_edges_from # has been mocked. And _merge_identical_nodes is designed only to # get a node's neighbors (i.e., its successors), assuming that # these are the same as its predecessors. mapp.add_edges_from([('A-1', 'A-3', {'RC': 'S', 'PDC': 5}), ('A-1', 'B-1', {'RC': 'I', 'PDC': 7}), ('A-1', 'C-1', {'RC': 'L', 'PDC': 10}), ('A-1', 'A-2', {'RC': 'I', 'PDC': 12})]) mapp.cong = mock_conn mapp._merge_identical_nodes('A-2', 'A-1') nt.assert_equal(mapp.cong.edges(), [('A-2', 'A-5'), ('A-4', 'A-2')]) nt.assert_equal(mapp.edges(), [('A-2', 'B-1'), ('A-2', 'C-1')])
def ts_from_expr(symtab, exprtab): """ symtab and exprtab are as produced by form.util.gen_expr(). """ envtrans = dict() systrans = list() num_uncontrolled = len([i for i in range(len(symtab)) if symtab[i]['uncontrolled']]) identifiers = [v['name'] for v in symtab] next_identifiers = [v['name']+'_next' for v in symtab] evalglobals = {'__builtins__': None, 'True': True, 'False': False} envtrans_formula = '(' + ') and ('.join(exprtab['ENVTRANS']) + ')' for state in stategen(symtab): stated = dict(zip(identifiers, state)) envtrans[state] = [] for next_state in stategen([v for v in symtab if v['uncontrolled']]): stated.update(dict(zip(next_identifiers, next_state))) if eval(envtrans_formula, evalglobals, stated): envtrans[state].append(next_state) systrans_formula = '(' + ') and ('.join(exprtab['SYSTRANS']) + ')' for state in stategen(symtab): stated = dict(zip(identifiers, state)) for next_state in stategen(symtab): stated.update(dict(zip(next_identifiers, next_state))) if eval(systrans_formula, evalglobals, stated): systrans.append((state, next_state)) G = DiGraph() G.add_edges_from(systrans) for nd in G.nodes_iter(): G.node[nd]['sat'] = list() stated = dict(zip(identifiers, nd)) for subformula in ['ENVINIT', 'SYSINIT']: if eval(exprtab[subformula], evalglobals, stated): G.node[nd]['sat'].append(subformula) for subformula in ['ENVGOAL', 'SYSGOAL']: for (i, goalexpr) in enumerate(exprtab[subformula]): if eval(goalexpr, evalglobals, stated): G.node[nd]['sat'].append(subformula+str(i)) return AnnTransitionSystem(symtab, G, envtrans, num_egoals=len(exprtab['ENVGOAL']), num_sgoals=len(exprtab['SYSGOAL']))
def eigenvector_similarity(ref_user_connections, eval_user_connections, is_directed=False): """ calculate transformed (s = 1/(1-d) )eigevector similiarity technique mentoioned in paper: https://www.cs.cmu.edu/~jingx/docs/DBreport.pdf code resource: http://goo.gl/XauaWB Parameters: ========== ref_user_connections: a list of edges eval_user_connections: a list of edges is_directed: boolean, False (default): edges forms an undirected graph True: edges forms a directed graph """ if is_directed: ref_graph, eval_graph = DiGraph(), DiGraph() ref_graph.add_edges_from(ref_user_connections) eval_graph.add_edges_from(eval_user_connections) else: ref_graph, eval_graph = Graph(), Graph() ref_graph.add_edges_from(ref_user_connections) eval_graph.add_edges_from(eval_user_connections) def select_k(spectrum, minimum_energy=0.9): running_total = 0.0 total = sum(spectrum) if total == 0.0: return len(spectrum) for i in range(len(spectrum)): running_total += spectrum[i] if running_total / total >= minimum_energy: return i + 1 return len(spectrum) ref_laplacian = nx.spectrum.laplacian_spectrum(ref_graph) eval_laplacian = nx.spectrum.laplacian_spectrum(eval_graph) k1 = select_k(ref_laplacian) k2 = select_k(eval_laplacian) k = min(k1, k2) score = sum((ref_laplacian[:k] - eval_laplacian[:k])**2) # original score is unbounded, # returns 0 for two identical graphs, # a larger value indicates a greater difference # convert the score from disance-style to similarity-style return 1.0 / (1.0 + score)
def compute_personalized_transition_matrix(G, alpha=0.85, restart_set=[SUPER_NODE]): """Returns the transition matrix of the random walk with restarts. Parameters ---------- G : graph alpha : float, optional The probability of the random surfer to continue their walk (default is 0.85). restart_set : list, optional The set of nodes to restart from. If not supplied, the restarts lead to the supernode (default is [SUPER_NODE]). Returns ------- P : scipy.sparse.matrix The probability matrix for the random walk with restarts. """ if not has_supernode(G) and SUPER_NODE in restart_set: raise CanonicalizationError('Cannot restart the random walks at the ' 'supernode') canonical_restart_set = [G.graph['canonical_map'][n] for n in restart_set] restart_graph = DiGraph() restart_edges = [(n, q) for n in G.nodes() for q in canonical_restart_set] # add a self loop edge at the supernode, if there is one, to avoid division # by zero when computing the transition matrix if has_supernode(G): # TODO Why is this edge added -- and then removed? restart_edges.append((G.graph['canonical_map'][SUPER_NODE], G.graph['canonical_map'][SUPER_NODE])) restart_graph.add_edges_from(restart_edges) P = compute_transition_matrix(G) P_restart = compute_transition_matrix(restart_graph) if has_supernode(G): # remove the bottom right corner (the added self-loop) P_restart[-1, -1] = 0 P_final = alpha * P + (1 - alpha) * P_restart # TODO do the transition probabilities from SUPER_NODE sum up to 1? # Does it matter? return P_final
def test_keep_one_level(): # Not mocked: _find_bottom_of_hierarchy, # _remove_level_from_hierarchy, _summate_connections. hierarchy = {'A-J': {'A-A': {}}, 'A-B': {'A-I': {'A-F': {'A-K': {}, 'A-L': {}}, 'A-H': {}}, 'A-D': {'A-E': {}}}} mock_conn = DiGraph() mock_conn.add_edges_from([('A-D', 'A-Z'), ('A-Y', 'A-F'), ('A-J', 'A-X')]) mapp = MapGraph() mapp.add_nodes_from(['A-A', 'A-B', 'A-D', 'A-E', 'A-F', 'A-H', 'A-I', 'A-J', 'A-K', 'A-L']) mapp.cong = mock_conn mapp._keep_one_level(hierarchy, 'A') nt.assert_equal(mapp.cong.edges(), [('A-J', 'A-X'), ('A-D', 'A-Z'), ('A-Y', 'A-F')]) nt.assert_equal(mapp.nodes(), ['A-H', 'A-J', 'A-D', 'A-F'])