def graph_equals( g1: nx.DiGraph, g2: nx.DiGraph, weight_column_name: Text = 'weight') -> bool: """Checks if two graphs are equal. If weight_column_name is None, then it does not check weight values. Args: g1: First graph to be compared. g2: Second graph to be compared. weight_column_name: The name of weight column. Returns: Boolean whether g1 equals g2 or not. Raises: None. """ if g1.nodes() != g2.nodes(): return False if g1.edges() != g2.edges(): return False if weight_column_name: for edge in g1.edges(): w1 = g1.get_edge_data(edge[0], edge[1])[weight_column_name] w2 = g2.get_edge_data(edge[0], edge[1])[weight_column_name] if w1 != w2: return False return True
def postprocessing(self, graph: nx.DiGraph): non_decidable_arrow = [] for src in self.non_doable: non_decidable_arrow += [(src, dst) for dst in list(graph.neighbors(src))] undirected_edge = set() for (src, dst) in non_decidable_arrow: # two undecidable case if (dst, src) not in undirected_edge and ( dst, src) in non_decidable_arrow: undirected_edge = undirected_edge | {(src, dst)} graph.remove_edge(src, dst) if graph.has_edge(src, dst) else None graph.remove_edge(dst, src) if graph.has_edge(dst, src) else None # one decidable with one undecidable elif (dst, src) not in non_decidable_arrow and graph.has_edge( dst, src): graph.remove_edge(src, dst) print('undirected', undirected_edge) if len(undirected_edge) == 0: return graph else: pdag = PDAG(directed_ebunch=list(graph.edges()), undirected_ebunch=list(undirected_edge)) return pdag.to_dag(required_edges=list(graph.edges()))
def merge_graphs(process_model_graph: nx.DiGraph, check_point_graph: nx.DiGraph) -> nx.DiGraph: """ Receives two graphs and merge them. The first is the PMG (process model graph) and the second it the CP (check point) graph. The PMG, then, incorporates the second graph. Before the merge, 5% of the PMG's weight is decayed. Parameters -------------------------------------- process_model_graph: nx.DiGraph, PMG graph check_point_graph: nx.DiGraph, CP graph Returns -------------------------------------- process_model_graph: nx.DiGraph, PMG after merge """ for node1, node2, data in process_model_graph.edges(data=True): data['weight'] *= 0.95 for node1, node2, data in check_point_graph.edges(data=True): path = (node1, node2) if path in process_model_graph.edges: process_model_graph[node1][node2]['weight'] += data['weight'] process_model_graph[node1][node2]['time'] += data['time'] else: process_model_graph.add_edge(*path, weight=data['weight'], time=data['time']) return normalize_graph(process_model_graph)
def draw_graph(graph: nx.DiGraph): good_edges = set((u, v) for u, v, data in graph.edges(data=True) if _get_edge_class(data) == EdgeClass.GOOD) good_nodes = set(v for (u, v) in good_edges) bad_edges = set((u, v) for u, v, data in graph.edges(data=True) if _get_edge_class(data) == EdgeClass.BAD) bad_nodes = set(v for (u, v) in bad_edges) nodes_pos = nx.spring_layout(graph) nx.draw_networkx_nodes(graph, pos=nodes_pos, alpha=0.6, node_color="gray") nx.draw_networkx_nodes(graph, pos=nodes_pos, alpha=0.8, nodelist=good_nodes, node_color="g") nx.draw_networkx_nodes(graph, pos=nodes_pos, alpha=0.8, nodelist=bad_nodes, node_color="r") nx.draw_networkx_labels(graph, pos=nodes_pos, font_size=8) nx.draw_networkx_edges(graph, pos=nodes_pos, alpha=0.25) nx.draw_networkx_edges(graph, edgelist=good_edges, pos=nodes_pos, alpha=0.5, width=4, edge_color="g") nx.draw_networkx_edges(graph, edgelist=bad_edges, pos=nodes_pos, alpha=0.5, width=4, edge_color="r")
def __dfs(self, v: int, cur_graph: nx.DiGraph, cur_graph_inv: nx.DiGraph, valid_graphs: List[nx.DiGraph], max_graphs: int): if len(valid_graphs) > max_graphs: return # endsに含まれていて入次数が0。 if v in self.ends and len(cur_graph_inv.edges([v])) == 0: return # 最後の頂点 if v == max(self.ends): # 使われていない頂点は除いたgraphを作成する g_generated = nx.DiGraph() g_generated.add_edges_from(cur_graph.edges) valid_graphs.append(g_generated) return # 自分への入次数が0かつstartsに含まれない if len(cur_graph_inv.edges([v])) == 0 and (not v in self.starts): self.__dfs(v + 1, cur_graph, cur_graph_inv, valid_graphs, max_graphs) return # 自分への入次数が1以上かstart edges = self.g.edges([v]) # for edge_selection in range(1, 1 << len(edges)): for edge_selection in reversed(list(range(1, 1 << len(edges)))): for i, (_, to) in enumerate(edges): if (1 << i) & edge_selection: cur_graph.add_edge(v, to) cur_graph_inv.add_edge(to, v) self.__dfs(v + 1, cur_graph, cur_graph_inv, valid_graphs, max_graphs) for i, (_, to) in enumerate(edges): if (1 << i) & edge_selection: cur_graph.remove_edge(v, to) cur_graph_inv.remove_edge(to, v)
def get_sort_edge(matrix, train_graph: nx.DiGraph, test_graph: nx.DiGraph, max_k): min_value = np.min(matrix) - 1 for train_edge in list(train_graph.edges()): #matrix[train_edge[0],train_edge[1]]=min_value #保证训练集中出现过的边为最小值 matrix[train_edge[0]][train_edge[1]] = min_value for node in train_graph.nodes: matrix[node][node] = min_value test_map = dict( zip(train_graph.nodes, [[] for _ in range(train_graph.number_of_nodes())])) for edge, weight in test_graph.edges().items(): if int(weight['weight']) == 0: continue sender, receiver = edge test_map[sender].append(receiver) result = [] print('start multi', datetime.now()) data = [(i, max_k, matrix[i]) for i in range(len(matrix))] print('end finish prepare data', datetime.now()) pool = Pool(50) print('start map', datetime.now()) result = pool.map(multi_sort, data) pool.close() pool.join() return np.array(list(result))
def solveBiobjectiveSP(G: nx.DiGraph, source: int, sink: int, objVal) -> dict: """ Computes a (weakly) non-dominated point of the biobjective shortest paths problem. :param G: directed graph :param source: Source node in G :param sink: Sink node in G :param objVal: Bound on second objective :return: Dict with objective value and path """ BiobjSP = Model('BiobjSP') # Variables X = dict() for u, v in G.edges(): X[u, v] = BiobjSP.addVar(vtype=GRB.BINARY, name=f'X_{u}_{v}') # Objective function BiobjSP.setObjective(quicksum(X[u, v] * G[u][v]['length1'] for u, v in G.edges()), sense=GRB.MINIMIZE) # Constraints for v in G.nodes(): if v == source: BiobjSP.addConstr( quicksum(X[a] for a in G.out_edges(v)) - quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, 1) elif v == sink: BiobjSP.addConstr( quicksum(X[a] for a in G.out_edges(v)) - quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, -1) else: BiobjSP.addConstr( quicksum(X[a] for a in G.out_edges(v)) - quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, 0) BiobjSP.addConstr( quicksum(X[u, v] * G[u][v]['length2'] for u, v in G.edges()), GRB.LESS_EQUAL, objVal - 1) # Solve model BiobjSP.update() BiobjSP.optimize() if BiobjSP.status == GRB.OPTIMAL: SP = dict() SP['objVal'] = (BiobjSP.objVal, sum( BiobjSP.getVarByName(f'X_{u}_{v}').x * G[u][v]['length2'] for u, v in G.edges())) SP['path'] = list() for u, v in G.edges(): if round(BiobjSP.getVarByName(f'X_{u}_{v}').x, 0) == 1: SP['path'].append((u, v)) return SP else: return dict()
def same_edges(g1: nx.DiGraph, g2: nx.DiGraph): """ Check if 2 digraphs have the same edges :param g1: :param g2: :return: """ return set(g1.edges()) == set(g2.edges())
def equivalent_singlegraphs(g1_single: nx.DiGraph, g2_single: nx.DiGraph) -> bool: return all([ g1_single.get_edge_data(*e) == g2_single.get_edge_data(*e) for e in g1_single.edges() ] + [ g1_single.get_edge_data(*e) == g2_single.get_edge_data(*e) for e in g2_single.edges() ]) & (g1_single.nodes() == g2_single.nodes())
class TestsIssue20(unittest.TestCase): """ Tests for issue #20 https://github.com/torressa/cspy/issues/20 """ def setUp(self): # Create simple digraph with appropriate attributes self.G = DiGraph(directed=True, n_res=2) self.G.add_edge("Source", 1, weight=10, res_cost=array([1, 1])) self.G.add_edge("Source", 2, weight=10, res_cost=array([1, 1])) self.G.add_edge("Source", 3, weight=10, res_cost=array([1, 1])) self.G.add_edge(1, "Sink", weight=-10, res_cost=array([1, 0])) self.G.add_edge(2, "Sink", weight=-10, res_cost=array([1, 0])) self.G.add_edge(3, "Sink", weight=-10, res_cost=array([1, 0])) self.G.add_edge(3, 2, weight=-5, res_cost=array([1, 1])) self.G.add_edge(2, 1, weight=-10, res_cost=array([1, 1])) self.max_res, self.min_res = [len(self.G.edges()), 2], [0, 0] @parameterized.expand(zip(range(100), range(100))) def testBiDirectional(self, _, seed): """ Find shortest path of simple test digraph using BiDirectional """ bidirec = BiDirectional(self.G, self.max_res, self.min_res, seed=seed) bidirec.run() path = bidirec.path cost = bidirec.total_cost total_res = bidirec.consumed_resources # Check path self.assertEqual(path, ['Source', 2, 1, 'Sink']) # Check attributes self.assertEqual(cost, -10) self.assertTrue(all(total_res == [3, 2])) self.assertTrue(all(e in self.G.edges() for e in zip(path, path[1:]))) def testTabu(self): """ Find shortest path of simple test digraph using Tabu """ tabu = Tabu(self.G, self.max_res, self.min_res) tabu.run() path = tabu.path cost = tabu.total_cost total_res = tabu.consumed_resources # Check attributes self.assertEqual(cost, -5) self.assertTrue(all(total_res == [3, 2])) self.assertEqual(path, ['Source', 3, 2, 'Sink']) # Check path self.assertTrue(all(e in self.G.edges() for e in zip(path, path[1:])))
def solveMaxFlow(G: nx.DiGraph, source: int, sink: int) -> dict: """ Solves the maximum flow problem. :param G: directed graph :param source: Source node in G :param sink: Sink node in G :return: Dict of edges and flow values """ maxFlow = Model('MaxFlow') # Variable X = dict() for a in G.edges(): X[a] = maxFlow.addVar(vtype=GRB.CONTINUOUS, lb=0, ub=G.get_edge_data(*a)['capacity'], name=f'X_{a}') B = maxFlow.addVar(vtype=GRB.CONTINUOUS, lb=0, name='B') # Objective function maxFlow.setObjective(B, sense=GRB.MAXIMIZE) # Constraints for v in G.nodes(): if v == source: maxFlow.addConstr( quicksum(X[a] for a in G.out_edges(v)) - quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, B) elif v == sink: maxFlow.addConstr( quicksum(X[a] for a in G.out_edges(v)) - quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, -B) else: maxFlow.addConstr( quicksum(X[a] for a in G.out_edges(v)) - quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, 0) # Solve model maxFlow.update() maxFlow.optimize() if maxFlow.status == GRB.OPTIMAL: flows = dict() for a in G.edges(): if maxFlow.getVarByName(f'X_{a}').x > 0: flows[a] = maxFlow.getVarByName(f'X_{a}').x return flows else: return dict()
def maximal_non_branching_paths(graph: nx.DiGraph) -> list: paths = [] for node in graph: if not is_in_1_out_1(graph, node) and graph.out_degree(node) > 0: for v, w in graph.edges(node): non_branching_path = [v, w] while is_in_1_out_1(graph, w): u = graph.edges(w)[0][1] non_branching_path.append(u) w = u paths.append(non_branching_path) return paths + isolated_cycles(graph)
class TestsIssue20(unittest.TestCase): """Tests for issue #20 https://github.com/torressa/cspy/issues/20 """ def setUp(self): # Create simple digraph with appropriate attributes self.G = DiGraph(directed=True, n_res=2) self.G.add_edge("Source", 1, weight=10, res_cost=array([1, 1])) self.G.add_edge("Source", 2, weight=10, res_cost=array([1, 1])) self.G.add_edge("Source", 3, weight=10, res_cost=array([1, 1])) self.G.add_edge(1, "Sink", weight=-10, res_cost=array([1, 0])) self.G.add_edge(2, "Sink", weight=-10, res_cost=array([1, 0])) self.G.add_edge(3, "Sink", weight=-10, res_cost=array([1, 0])) self.G.add_edge(3, 2, weight=-5, res_cost=array([1, 1])) self.G.add_edge(2, 1, weight=-10, res_cost=array([1, 1])) # Maximum and minimum resource arrays self.max_res, self.min_res = [len(self.G.edges()), 2], [0, 0] # Expected results self.result_path = ['Source', 2, 1, 'Sink'] self.total_cost = -10 self.consumed_resources = [3, 2] def test_bidirectional(self): """ Test BiDirectional with randomly chosen sequence of directions for a range of seeds. """ alg = BiDirectional(self.G, self.max_res, self.min_res, elementary=True) alg.run() self.assertEqual(alg.path, self.result_path) self.assertEqual(alg.total_cost, self.total_cost) self.assertEqual(alg.consumed_resources, self.consumed_resources) self.assertTrue( all(e in self.G.edges() for e in zip(alg.path, alg.path[1:]))) def test_tabu(self): """ Find shortest path of using Tabu """ alg = Tabu(self.G, self.max_res, self.min_res) alg.run() self.assertEqual(alg.path, self.result_path) self.assertEqual(alg.total_cost, self.total_cost) self.assertTrue(all(alg.consumed_resources == self.consumed_resources)) self.assertTrue( all(e in self.G.edges() for e in zip(alg.path, alg.path[1:])))
def kruskal_min_spanning_tree(G : nx.DiGraph): Q = [] S = disjoint_set.DisjointSet() for e in G.edges(): u,v = e w = G.edges()[u,v]["weight"] heapq.heappush(Q, (w, e)) T = nx.DiGraph() while Q: w, (u,v) = heapq.heappop(Q) if S.connected(u,v): continue S.union(u,v) T.add_edge(u,v, weight=w) return T
def normalize_graph(graph: nx.DiGraph) -> nx.DiGraph: """ Time and weight normalization for each edge in the graph. Time normalization is the mean time of an edge. Trace normalization is based on the graph weights Parameters -------------------------------------- graph: nx.DiGraph, Graph to be normalized Returns -------------------------------------- graph: nx.DiGraph, Normalized graph """ edges = graph.edges(data=True) attributes: "list[dict[str, float]]" = [ attributes for _, _, attributes in edges ] weights = map(lambda attribute: attribute.get("weight"), attributes) max_weight = max(weights) for data in attributes: edge_weight = data.get("weight") edge_time = data.get("time") data["weight_normalized"] = edge_weight / max_weight data["time_normalized"] = edge_time / edge_weight return graph
def common_edge_ratio(ref_user_connections, eval_user_connections, is_directed=False): """ caulcalate the fraction of common edges fraction out of union of two graphs Parameters: ========== ref_user_connections: a list of edges eval_user_connections: a list of edges is_directed: boolean, False (default): edges forms an undirected graph True: edges forms a directed graph """ ref_user_connections = _normalize_connections(ref_user_connections, is_directed) eval_user_connections = _normalize_connections(eval_user_connections, is_directed) if is_directed: ref_graph, eval_graph = DiGraph(), DiGraph() else: ref_graph, eval_graph = Graph(), Graph() ref_graph.add_edges_from(ref_user_connections) eval_graph.add_edges_from(eval_user_connections) ref_edges, eval_edges = ref_graph.edges(), eval_graph.edges() tot_common = sum([1 if edge in ref_edges else 0 for edge in eval_edges]) union_size = len(ref_edges) + len(eval_edges) - tot_common return tot_common / union_size
def sequential_subgraph_nodes(g: nx.DiGraph, size: int) -> List[List[Union[str, int]]]: if not nx.is_weakly_connected(g): raise nx.NetworkXUnfeasible( "sequential solutions are not possible for disconnected graphs.") if size <= 1: raise nx.NetworkXUnfeasible( "the minimum directed subgraph length is 2 nodes.") g = nx.DiGraph(g.edges()) # make a copy because we'll modify the structure graphs = [] while len(g.nodes()) > 1: sg = find_leafy_branch_larger_than_size(g, size) sg_nodes = list(nx.lexicographical_topological_sort(sg)) graphs.append(sg_nodes) # trim the upstream nodes out of the graph, except the upstream root us_nodes = [n for n, deg in sg.out_degree if deg > 0] g = g.subgraph([n for n in g.nodes() if n not in us_nodes]) # rinse and repeat until there's one or fewer nodes left in the graph return graphs
def build_dict_graph(nodes: nx.DiGraph) -> dict: graph = {} for left, right in nodes.edges(): if left not in graph: graph[left] = [] graph[left].append(right) return graph
def prune_graph_simple(self, graph: nx.DiGraph, flow: Tuple[int, int]) -> nx.DiGraph: """ Remove cycles between flow source and sink. Uses distances to give a partial topological order then removes edges that take us in the wrong direction. Simple but removes more paths than necessary. Args: graph: graph to DAGify flow: source and sink of the flow Returns: A DAG with source at the start and sink at the end """ graph = graph.copy() # first calculate distance to sink for each vertex distances = collections.defaultdict(int) distance_results = nx.shortest_path_length(graph, source=None, target=flow[1], weight='route_weight') distances.update(distance_results) # now we prune edges that take us further from the destination so # that there are no cycles for (src, dst) in list(graph.edges()): if distances[dst] >= distances[src]: graph.remove_edge(src, dst) return graph
def load_dependency_graph(self): dep_path = Config.get("dependency_graph") self.log.info('Loading model dependency graph', path = dep_path) try: dep_graph_str = open(dep_path).read() # joint_dependencies is of the form { Model1 -> [(Model2, src_port, dst_port), ...] } # src_port is the field that accesses Model2 from Model1 # dst_port is the field that accesses Model1 from Model2 joint_dependencies = json.loads(dep_graph_str) model_dependency_graph = DiGraph() for src_model, deps in joint_dependencies.items(): for dep in deps: dst_model, src_accessor, dst_accessor = dep if src_model != dst_model: edge_label = {'src_accessor': src_accessor, 'dst_accessor': dst_accessor} model_dependency_graph.add_edge( src_model, dst_model, edge_label) model_dependency_graph_rev = model_dependency_graph.reverse( copy=True) self.model_dependency_graph = { # deletion True: model_dependency_graph_rev, False: model_dependency_graph } self.log.info("Loaded dependencies", edges = model_dependency_graph.edges()) except Exception as e: self.log.exception("Error loading dependency graph", e = e) raise e
def mean_weight(self): edges=DiGraph.edges(self,data='weight') sum=0.0 for edge in edges: sum=sum+edge[2] mean_weight=sum/float(len(edges)) return mean_weight
def gravity_demand(graph: nx.DiGraph) -> Demand: """ Generates gravity demand (deterministic, based on bandwidth) for one time step Args: graph: Networkx DiGraph with 'weight' on edges to generate demand from Returns: A demand array """ num_nodes = graph.number_of_nodes() sorted_edges = sorted(graph.edges(data=True)) edge_weights = [e[2]['weight'] for e in sorted_edges] total_flow = sum(edge_weights) node_in_flow = np.zeros(num_nodes, np.float32) node_out_flow = np.zeros(num_nodes, np.float32) for i, edge in enumerate(sorted_edges): node_in_flow[edge[1]] += edge_weights[i] node_out_flow[edge[0]] += edge_weights[i] return np.divide( np.array([ node_out_flow[i] * node_in_flow[j] for i in range(num_nodes) for j in range(num_nodes) if i != j ]), total_flow * 10)
def create_triples( graph: nx.DiGraph, node2int: Optional[Mapping] = None, relation: Any = 0, create_int_ids: bool = False ) -> Tuple[List[Tuple[Any, Any, Any]], Mapping]: if node2int is None and create_int_ids: node2int = create_unique_int_ids(graph) if node2int is None: node2int = {} def n2i(n): node2int[n] = n return n else: def n2i(n): return node2int[n] samples = [(n2i(head), n2i(tail), relation) for head, tail in graph.edges()] return samples, node2int
def _get_single_path( # pylint: disable=too-many-arguments, too-many-locals self, graph: DiGraph, source: Address, target: Address, value: PaymentAmount, address_to_reachability: Dict[Address, AddressReachability], visited: Dict[ChannelID, float], disallowed_paths: List[List[Address]], fee_penalty: float, ) -> Optional[Path]: # update edge weights for node1, node2 in graph.edges(): edge = graph[node1][node2] backwards_edge = graph[node2][node1] edge["weight"] = self.edge_weight( visited=visited, view=edge["view"], view_from_partner=backwards_edge["view"], amount=value, fee_penalty=fee_penalty, ) # find next path all_paths: Iterable[List[Address]] = nx.shortest_simple_paths( G=graph, source=source, target=target, weight="weight") try: # skip duplicates and invalid paths path = next( p for p in (Path(self.G, nodes, value, address_to_reachability) for nodes in all_paths) if p.is_valid and p.nodes not in disallowed_paths) return path except StopIteration: return None
def simplify_debt_graph(debt_graph: nx.DiGraph) -> nx.DiGraph: # I can only examine one cycle at a time because I delete edges try: cycle = next(nx.simple_cycles(debt_graph)) except StopIteration: return debt_graph # Get all edges of cycle edges = [] for i in range(len(cycle)): if i == len(cycle) - 1: edges.append(debt_graph[cycle[i]][cycle[0]]) else: edges.append(debt_graph[cycle[i]][cycle[i + 1]]) # Find min edge weight min_edge_weight = min([e['weight'] for e in edges]) # Subtract edge min weight for edge in edges: edge['weight'] -= min_edge_weight # Delete edge(s) with weight zero ebunch = [] for u, v, data in debt_graph.edges(data=True): if data['weight'] == 0: ebunch.append((u, v)) debt_graph.remove_edges_from(ebunch) return simplify_debt_graph(debt_graph)
def add_style_interactionsigns(igraph: networkx.DiGraph): """ Sets attributes for the arrow head and edge color of interactions to indicate the interaction sign. Activating interactions get the attributes *"arrowhead"="normal"* and *"color"="black"*, inhibiting interactions get the attributes *"arrowhead"="tee"* and *"color"="red"*, and ambivalent interaction get the attributes *"arrowhead"="dot"* and *"color"="blue"*. **arguments**: * *igraph*: interaction graph **example**:: >>> add_style_interactionsigns(igraph) """ for source, target, attr in sorted(igraph.edges(data=True)): if attr["sign"] == {1, -1}: igraph.adj[source][target]["arrowhead"] = "dot" igraph.adj[source][target]["color"] = "dodgerblue" elif attr["sign"] == {-1}: igraph.adj[source][target]["arrowhead"] = "tee" igraph.adj[source][target]["color"] = "red" elif attr["sign"] == {1}: igraph.adj[source][target]["arrowhead"] = "normal" igraph.adj[source][target]["color"] = "black"
def add_style_activities(igraph: networkx.DiGraph, activities: Union[str, dict], color_active: str = "/paired10/5", color_inactive: str = "/paired10/1"): """ Sets attributes for the color and fillcolor of nodes to indicate which variables are activated and which are inhibited in *Activities*. All activated or inhibited components get the attribute *"color"="black"*. Activated components get the attribute *"fillcolor"="red"* and inactivated components get the attribute *"fillcolor"="blue"*. Interactions involving activated or inhibited nodes get the attribute *"color"="gray"* to reflect that they are ineffective. **arguments**: * *igraph*: interaction graph * *activities*: activated and inhibited nodes * *color_active*: color in dot format for active components * *color_inactive*: color in dot format for inactive components **example**:: >>> activities = {"ERK":1, "MAPK":0} >>> add_style_activities(igraph, activities) """ names = sorted(igraph.nodes()) if type(activities) is str: activities = subspace2dict(names, activities) for name in igraph.nodes(): if name in activities: igraph.nodes[name]["color"] = "black" igraph.nodes[name]["fillcolor"] = color_active if activities[name] == 1 else color_inactive for x, y in igraph.edges(): if x in activities or y in activities: igraph.adj[x][y]["color"] = "gray"
def is_cycle(alert_sub_g: nx.DiGraph, is_ordered: bool = True): alert_id = alert_sub_g.graph["alert_id"] edges = alert_sub_g.edges(data=True) cycles = list(nx.simple_cycles( alert_sub_g)) # Use simple_cycles function directly (subgraph is small enough) if len(cycles) != 1: logging.info("Alert %s is not a cycle pattern" % alert_id) return False if is_ordered: edges.sort(key=lambda e: e[2]["date"]) next_orig = None next_amt = sys.float_info.max next_date = datetime.strptime("1970-01-01", "%Y-%m-%d") for orig, bene, attr in edges: if next_orig is not None and orig != next_orig: logging.info("Alert %s is not a cycle pattern" % alert_id) return False else: next_orig = bene amount = attr["amount"] if amount == next_amt: logging.info("Alert %s cycle transaction amounts are unordered" % alert_id) return False else: next_amt = amount date = attr["date"] if date < next_date: logging.info("Alert %s cycle transactions are chronologically unordered" % alert_id) return False else: next_date = date return True
def check_initial_routes(initial_routes: list = None, G: DiGraph = None): """ Checks if initial routes are consistent. TODO : check if it is entirely feasible depending on VRP type. One way of doing it : run the subproblem by fixing variables corresponding to initial solution. """ # Check if routes start at Sink and end at Node for route in initial_routes: if route[0] != "Source" or route[-1] != "Sink": raise ValueError("Route %s must start at Source and end at Sink" % route) # Check if every node is in at least one route for v in G.nodes(): if v not in ["Source", "Sink"]: node_found = 0 for route in initial_routes: if v in route: node_found += 1 if node_found == 0: raise KeyError("Node %s missing from initial solution." % v) # Check if edges from initial solution exist and have cost attribute for route in initial_routes: edges = list(zip(route[:-1], route[1:])) for (i, j) in edges: if (i, j) not in G.edges(): raise KeyError("Edge (%s,%s) in route %s missing in graph." % (i, j, route)) if "cost" not in G.edges[i, j]: raise KeyError("Edge (%s,%s) has no cost attribute." % (i, j))
def annotate_graph_with_features(self, g: nx.DiGraph, include_target: bool = True): # one-hot encode the graph nodes one_hot_encoded = to_one_hot(torch.arange(0, self.n_parts), self.n_parts) new_g = nx.DiGraph() for n, data in g.nodes(data=True): new_g.add_node(n, **data) for n1, n2, edata in g.edges(data=True): edata["features"] = np.array([0.0]) if include_target: edata["target"] = np.array([1.0]) new_g.add_edge(n1, n2, **edata) if include_target: self.steady_state(new_g, node_to_part=lambda x: x[-1]) for n, ndata in new_g.nodes(data=True): # convert this to ONE HOT! ndata["features"] = one_hot_encoded[list(n)[-1]] if include_target: ndata["target"] = torch.tensor([ndata["y"].flatten()], dtype=torch.float) new_g.data = {"features": torch.tensor([0])} if include_target: new_g.data["target"] = torch.tensor([0]) return new_g
def check_vrp(G: DiGraph = None): """Checks if graph is well defined.""" # if G is not a DiGraph if not isinstance(G, DiGraph): raise TypeError( "Input graph must be of type networkx.classes.digraph.DiGraph.") for v in ["Source", "Sink"]: # If Source or Sink is missing if v not in G.nodes(): raise KeyError("Input graph requires Source and Sink nodes.") # If Source has incoming edges if len(list(G.predecessors("Source"))) > 0: raise NetworkXError("Source must have no incoming edges.") # If Sink has outgoing edges if len(list(G.successors("Sink"))) > 0: raise NetworkXError("Sink must have no outgoing edges.") # Roundtrips should always be possible # Missing edges are added with a high cost for v in G.nodes(): if v not in ["Source", "Sink"]: if v not in G.successors("Source"): logger.warning("Source not connected to %s" % v) G.add_edge("Source", v, cost=1e10) if v not in G.predecessors("Sink"): logger.warning("%s not connected to Sink" % v) G.add_edge(v, "Sink", cost=1e10) # If graph is disconnected if not has_path(G, "Source", "Sink"): raise NetworkXError("Source and Sink are not connected.") # If cost is missing for (i, j) in G.edges(): if "cost" not in G.edges[i, j]: raise KeyError("Edge (%s,%s) requires cost attribute" % (i, j))
def to_ail_supergraph(transition_graph: networkx.DiGraph) -> networkx.DiGraph: """ Takes an AIL graph and converts it into a AIL graph that treats calls and redundant jumps as parts of a bigger block instead of transitions. Calls to returning functions do not terminate basic blocks. Based on region_identifier super_graph :return: A converted super transition graph """ # make a copy of the graph transition_graph = networkx.DiGraph(transition_graph) while True: for src, dst, data in transition_graph.edges(data=True): type_ = data.get('type', None) if len(list(transition_graph.successors(src))) == 1 and len(list(transition_graph.predecessors(dst))) == 1: # calls in the middle of blocks OR boring jumps if (type_ == 'fake_return') or (src.addr + src.original_size == dst.addr): _merge_ail_nodes(transition_graph, src, dst) break # calls to functions with no return elif type_ == 'call': transition_graph.remove_node(dst) break else: break return transition_graph
def remove_all_nodes_but_calls_and_subscripts( graph: networkx.DiGraph) -> networkx.DiGraph: """ Removes all nodes that can not be a operator we might care about """ def process_node(node, _): if node.operation in {"Import", "Constant"}: graph.remove_node(node) elif node.operation in {"Assign", "Keyword", "List", "Tuple"}: parents = list(graph.predecessors(node)) children = list(graph.successors(node)) for parent_node in parents: for child_node in children: graph.add_edge(parent_node, child_node) graph.remove_node(node) elif node.operation in {"Call", "Subscript", "Subscript-Assign"}: pass elif node == WirExtractor.NOT_FOUND_WIR: pass else: print("Unknown WIR Node Type: {}".format(node)) assert False traverse_graph_and_process_nodes(graph, process_node) # By modifying edges, most labels are lost, so we remove the rest of them too for (_, _, edge_attributes) in graph.edges(data=True): edge_attributes.clear() return graph
def add_style_tendencies(stg: networkx.DiGraph): """ Sets or overwrites the edge colors to reflect whether a transition increases values (*black*), decreases values (*red*), or both (*blue*) which is only possible for non-asynchronous transitions. **arguments**: * *stg*: state transition graph **example**:: >>> add_style_tendencies(stg) """ for source, target, attr in sorted(stg.edges(data=True)): inc = any([source[x] + target[x] == "01" for x in range(len(source))]) dec = any([source[x] + target[x] == "10" for x in range(len(source))]) if inc and dec: stg.adj[source][target]["color"] = "dodgerblue" if inc: continue if dec: stg.adj[source][target]["color"] = "red"
def test_remove_node(): mock_mapp = DiGraph() mock_mapp.add_node('X') mock_mapp.add_edges_from([('A', 'B', {'TP': ['X']}), ('B', 'C', {'TP': ['Y']})]) MapGraph.remove_node.im_func(mock_mapp, 'X') nt.assert_equal(mock_mapp.edges(), [('B', 'C')])
def outcoming_edges(graph: nx.DiGraph, node: int or str) -> (int or str, int or str): edges = [] for node_out, node_in in graph.edges(): if type(node) == str: if node_out == str(node): edges.append((str(node_out), str(node_in))) else: if node_out == node: edges.append((node_out, node_in)) return edges
def filter_edges(self,thresh): edges=DiGraph.edges(self,data='weight') for edge in edges: if edge[2]<thresh: DiGraph.remove_edge(self,edge[0],edge[1]) if DiGraph.in_degree(self,edge[0])==0 and DiGraph.out_degree(self,edge[0])==0: DiGraph.remove_node(self,edge[0]) if DiGraph.in_degree(self,edge[1])==0 and DiGraph.out_degree(self,edge[1])==0: DiGraph.remove_node(self,edge[1])
def incidence_matrix(graph: nx.DiGraph) -> scs.csc_matrix: edges = graph.edges() nodes = sorted(graph.nodes()) result = scs.lil_matrix((nodes.__len__(), edges.__len__())) for i in range(nodes.__len__()): for j in range(edges.__len__()): u, v = edges[j] if nodes[i] == u: result[i, j] = 1 elif nodes[i] == v: result[i, j] = -1 return result.tocsc()
def median_weight(self): edges=DiGraph.edges(self,data='weight') mlist=[] for edge in edges: mlist.append(edge[2]) n=len(mlist) mlist=sorted(mlist) if n%2==0: x=mlist[round(n/2)]+mlist[round(n/2)+1] median_weight=float(x)/float(2) else: median_weight=mlist[round(n/2)] return median_weight
def load_dependency_graph(self): try: if Config.get("dependency_graph"): self.log.debug( "Loading model dependency graph", path=Config.get("dependency_graph"), ) dep_graph_str = open(Config.get("dependency_graph")).read() else: self.log.debug("Using default model dependency graph", graph={}) dep_graph_str = "{}" # joint_dependencies is of the form { Model1 -> [(Model2, src_port, dst_port), ...] } # src_port is the field that accesses Model2 from Model1 # dst_port is the field that accesses Model1 from Model2 static_dependencies = json.loads(dep_graph_str) dynamic_dependencies = ( [] ) # Dropped Service and ServiceInstance dynamic dependencies joint_dependencies = dict( list(static_dependencies.items()) + dynamic_dependencies ) model_dependency_graph = DiGraph() for src_model, deps in joint_dependencies.items(): for dep in deps: dst_model, src_accessor, dst_accessor = dep if src_model != dst_model: edge_label = { "src_accessor": src_accessor, "dst_accessor": dst_accessor, } model_dependency_graph.add_edge( src_model, dst_model, **edge_label ) model_dependency_graph_rev = model_dependency_graph.reverse(copy=True) self.model_dependency_graph = { # deletion True: model_dependency_graph_rev, False: model_dependency_graph, } self.log.debug("Loaded dependencies", edges=model_dependency_graph.edges()) except Exception as e: self.log.exception("Error loading dependency graph", e=e) raise e
def main(access_token, package_name, max_depth): graph = DiGraph() graphcommons = GraphCommons(access_token) import_package_dependencies(graph, package_name, max_depth=max_depth) signals = [] for (node, data) in graph.nodes(data=True): if data['type'] == 'PACKAGE': reference = "https://www.npmjs.com/package/%s" % node else: reference = 'https://www.npmjs.com/~%s' % node signals.append(Signal( action="node_create", name=node, type=data['type'], reference=reference )) for source, target, data in graph.edges(data=True): signals.append(Signal( action="edge_create", from_name=source, from_type=graph.node[source]['type'], to_name=target, to_type=graph.node[target]['type'], name=data['type'], weight=1 )) created_graph = graphcommons.new_graph( name="Dependency Network of %s" % package_name, description="Dependency Network of %s Package" % package_name, signals=signals ) print 'Created Graph URL:' print 'https://graphcommons.com/graphs/%s' % created_graph.id
def isolated_cycles(graph: nx.DiGraph) -> list: cycles = [] cycled_nodes = set() for node in graph: w = node cycle = [w] is_isolated_cycle = True while is_isolated_cycle: if not is_in_1_out_1(graph, w) or w in cycled_nodes: is_isolated_cycle = False elif w == node: break else: u = graph.edges(w)[0][1] cycle.append(u) w = u if is_isolated_cycle: cycles.append(cycle) cycled_nodes.add(node) return cycles
class GVMAnalysis : def __init__(self, vmx, apk) : self.vmx = vmx self.vm = self.vmx.get_vm() self.nodes = {} self.nodes_id = {} self.entry_nodes = [] self.G = DiGraph() for j in self.vmx.get_tainted_packages().get_internal_packages() : n1 = self._get_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() ) n2 = self._get_node( j.get_class_name(), j.get_name(), j.get_descriptor() ) self.G.add_edge( n1.id, n2.id ) n1.add_edge( n2, j ) # print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \ # j.get_bb().start + j.get_idx(), \ # j.get_class_name(), j.get_name(), j.get_descriptor()) if apk != None : for i in apk.get_activities() : j = bytecode.FormatClassToJava(i) n1 = self._get_exist_node( j, "onCreate", "(Landroid/os/Bundle;)V" ) if n1 != None : n1.set_attributes( { "type" : "activity" } ) n1.set_attributes( { "color" : ACTIVITY_COLOR } ) n2 = self._get_new_node_from( n1, "ACTIVITY" ) n2.set_attributes( { "color" : ACTIVITY_COLOR } ) self.G.add_edge( n2.id, n1.id ) self.entry_nodes.append( n1.id ) for i in apk.get_services() : j = bytecode.FormatClassToJava(i) n1 = self._get_exist_node( j, "onCreate", "()V" ) if n1 != None : n1.set_attributes( { "type" : "service" } ) n1.set_attributes( { "color" : SERVICE_COLOR } ) n2 = self._get_new_node_from( n1, "SERVICE" ) n2.set_attributes( { "color" : SERVICE_COLOR } ) self.G.add_edge( n2.id, n1.id ) self.entry_nodes.append( n1.id ) for i in apk.get_receivers() : j = bytecode.FormatClassToJava(i) n1 = self._get_exist_node( j, "onReceive", "(Landroid/content/Context; Landroid/content/Intent;)V" ) if n1 != None : n1.set_attributes( { "type" : "receiver" } ) n1.set_attributes( { "color" : RECEIVER_COLOR } ) n2 = self._get_new_node_from( n1, "RECEIVER" ) n2.set_attributes( { "color" : RECEIVER_COLOR } ) self.G.add_edge( n2.id, n1.id ) self.entry_nodes.append( n1.id ) # Specific Java/Android library for c in self.vm.get_classes() : #if c.get_superclassname() == "Landroid/app/Service;" : # n1 = self._get_node( c.get_name(), "<init>", "()V" ) # n2 = self._get_node( c.get_name(), "onCreate", "()V" ) # self.G.add_edge( n1.id, n2.id ) if c.get_superclassname() == "Ljava/lang/Thread;" or c.get_superclassname() == "Ljava/util/TimerTask;" : for i in self.vm.get_method("run") : if i.get_class_name() == c.get_name() : n1 = self._get_node( i.get_class_name(), i.get_name(), i.get_descriptor() ) n2 = self._get_node( i.get_class_name(), "start", i.get_descriptor() ) # link from start to run self.G.add_edge( n2.id, n1.id ) n2.add_edge( n1, {} ) # link from init to start for init in self.vm.get_method("<init>") : if init.get_class_name() == c.get_name() : n3 = self._get_node( init.get_class_name(), "<init>", init.get_descriptor() ) #n3 = self._get_node( i.get_class_name(), "<init>", i.get_descriptor() ) self.G.add_edge( n3.id, n2.id ) n3.add_edge( n2, {} ) #elif c.get_superclassname() == "Landroid/os/AsyncTask;" : # for i in self.vm.get_method("doInBackground") : # if i.get_class_name() == c.get_name() : # n1 = self._get_node( i.get_class_name(), i.get_name(), i.get_descriptor() ) # n2 = self._get_exist_node( i.get_class_name(), "execute", i.get_descriptor() ) # print n1, n2, i.get_descriptor() #for j in self.vm.get_method("doInBackground") : # n2 = self._get_exist_node( i.get_class_name(), j.get_name(), j.get_descriptor() ) # print n1, n2 # n2 = self._get_node( i.get_class_name(), " # raise("ooo") #for j in self.vmx.tainted_packages.get_internal_new_packages() : # print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \ # j.get_bb().start + j.get_idx(), \ # j.get_class_name(), j.get_name(), j.get_descriptor()) list_permissions = self.vmx.get_permissions( [] ) for x in list_permissions : for j in list_permissions[ x ] : #print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \ # j.get_bb().start + j.get_idx(), \ # j.get_class_name(), j.get_name(), j.get_descriptor()) n1 = self._get_exist_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() ) if n1 == None : continue n1.set_attributes( { "permissions" : 1 } ) n1.set_attributes( { "permissions_level" : DVM_PERMISSIONS[ "MANIFEST_PERMISSION" ][ x ][0] } ) n1.set_attributes( { "permissions_details" : x } ) try : for tmp_perm in PERMISSIONS_RISK[ x ] : if tmp_perm in DEFAULT_RISKS : n2 = self._get_new_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() + " " + DEFAULT_RISKS[ tmp_perm ][0], DEFAULT_RISKS[ tmp_perm ][0] ) n2.set_attributes( { "color" : DEFAULT_RISKS[ tmp_perm ][1] } ) self.G.add_edge( n2.id, n1.id ) n1.add_risk( DEFAULT_RISKS[ tmp_perm ][0] ) n1.add_api( x, j.get_class_name() + "-" + j.get_name() + "-" + j.get_descriptor() ) except KeyError : pass # Tag DexClassLoader for m, _ in self.vmx.get_tainted_packages().get_packages() : if m.get_info() == "Ldalvik/system/DexClassLoader;" : for path in m.get_paths() : if path.get_access_flag() == TAINTED_PACKAGE_CREATE : n1 = self._get_exist_node( path.get_method().get_class_name(), path.get_method().get_name(), path.get_method().get_descriptor() ) n2 = self._get_new_node( path.get_method().get_class_name(), path.get_method().get_name(), path.get_method().get_descriptor() + " " + "DEXCLASSLOADER", "DEXCLASSLOADER" ) n1.set_attributes( { "dynamic_code" : "true" } ) n2.set_attributes( { "color" : DEXCLASSLOADER_COLOR } ) self.G.add_edge( n2.id, n1.id ) n1.add_risk( "DEXCLASSLOADER" ) def _get_exist_node(self, class_name, method_name, descriptor) : key = "%s %s %s" % (class_name, method_name, descriptor) try : return self.nodes[ key ] except KeyError : return None def _get_node(self, class_name, method_name, descriptor) : key = "%s %s %s" % (class_name, method_name, descriptor) if key not in self.nodes : self.nodes[ key ] = NodeF( len(self.nodes), class_name, method_name, descriptor ) self.nodes_id[ self.nodes[ key ].id ] = self.nodes[ key ] return self.nodes[ key ] def _get_new_node_from(self, n, label) : return self._get_new_node( n.class_name, n.method_name, n.descriptor + label, label ) def _get_new_node(self, class_name, method_name, descriptor, label) : key = "%s %s %s" % (class_name, method_name, descriptor) if key not in self.nodes : self.nodes[ key ] = NodeF( len(self.nodes), class_name, method_name, descriptor, label, False ) self.nodes_id[ self.nodes[ key ].id ] = self.nodes[ key ] return self.nodes[ key ] def set_new_attributes(self, cm) : for i in self.G.nodes() : n1 = self.nodes_id[ i ] m1 = self.vm.get_method_descriptor( n1.class_name, n1.method_name, n1.descriptor ) H = cm( self.vmx, m1 ) n1.set_attributes( H ) def export_to_gexf(self) : buff = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" buff += "<gexf xmlns=\"http://www.gephi.org/gexf\" xmlns:viz=\"http://www.gephi.org/gexf/viz\">\n" buff += "<graph type=\"static\">\n" buff += "<attributes class=\"node\" type=\"static\">\n" buff += "<attribute default=\"normal\" id=\"%d\" title=\"type\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "type"] buff += "<attribute id=\"%d\" title=\"class_name\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "class_name"] buff += "<attribute id=\"%d\" title=\"method_name\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "method_name"] buff += "<attribute id=\"%d\" title=\"descriptor\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "descriptor"] buff += "<attribute default=\"0\" id=\"%d\" title=\"permissions\" type=\"integer\"/>\n" % ID_ATTRIBUTES[ "permissions"] buff += "<attribute default=\"normal\" id=\"%d\" title=\"permissions_level\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "permissions_level"] buff += "<attribute default=\"false\" id=\"%d\" title=\"dynamic_code\" type=\"boolean\"/>\n" % ID_ATTRIBUTES[ "dynamic_code"] buff += "</attributes>\n" buff += "<nodes>\n" for node in self.G.nodes() : buff += "<node id=\"%d\" label=\"%s\">\n" % (node, escape(self.nodes_id[ node ].label)) buff += self.nodes_id[ node ].get_attributes_gexf() buff += "</node>\n" buff += "</nodes>\n" buff += "<edges>\n" nb = 0 for edge in self.G.edges() : buff += "<edge id=\"%d\" source=\"%d\" target=\"%d\"/>\n" % (nb, edge[0], edge[1]) nb += 1 buff += "</edges>\n" buff += "</graph>\n" buff += "</gexf>\n" return buff def export_to_gml(self) : buff = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" buff += "<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:y=\"http://www.yworks.com/xml/graphml\" xmlns:yed=\"http://www.yworks.com/xml/yed/3\" xsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns http://www.yworks.com/xml/schema/graphml/1.1/ygraphml.xsd\">\n" buff += "<key attr.name=\"description\" attr.type=\"string\" for=\"node\" id=\"d5\"/>\n" buff += "<key for=\"node\" id=\"d6\" yfiles.type=\"nodegraphics\"/>\n" buff += "<graph edgedefault=\"directed\" id=\"G\">\n" for node in self.G.nodes() : buff += "<node id=\"%d\">\n" % (node) #fd.write( "<node id=\"%d\" label=\"%s\">\n" % (node, escape(self.nodes_id[ node ].label)) ) buff += self.nodes_id[ node ].get_attributes_gml() buff += "</node>\n" nb = 0 for edge in self.G.edges() : buff += "<edge id=\"%d\" source=\"%d\" target=\"%d\"/>\n" % (nb, edge[0], edge[1]) nb += 1 buff += "</graph>\n" buff += "</graphml>\n" return buff def get_paths_method(self, method) : return self.get_paths( method.get_class_name(), method.get_name(), method.get_descriptor() ) def get_paths(self, class_name, method_name, descriptor) : import connectivity_approx as ca paths = [] key = "%s %s %s" % (class_name, method_name, descriptor) if key not in self.nodes : return paths for origin in self.G.nodes() : #self.entry_nodes : if ca.vertex_connectivity_approx(self.G, origin, self.nodes[ key ].id) > 0 : for path in ca.node_independent_paths(self.G, origin, self.nodes[ key ].id) : if self.nodes_id[ path[0] ].real == True : paths.append( path ) return paths def print_paths_method(self, method) : self.print_paths( method.get_class_name(), method.get_name(), method.get_descriptor() ) def print_paths(self, class_name, method_name, descriptor) : paths = self.get_paths( class_name, method_name, descriptor ) for path in paths : print path, ":" print "\t", for p in path[:-1] : print self.nodes_id[ p ].label, "-->", print self.nodes_id[ path[-1] ].label
class LSDB(object): def __init__(self): self.private_address_network = ip_network(CFG.get(DEFAULTSECT, 'private_net')) try: with open(CFG.get(DEFAULTSECT, 'private_ips'), 'r') as f: self.private_address_binding = json.load(f) self.router_private_address = {} for subnets in self.private_address_binding.itervalues(): for rid, ip in subnets.iteritems(): try: iplist = self.router_private_address[rid] except KeyError: iplist = self.router_private_address[rid] = [] iplist.append(ip) except Exception as e: log.error('Incorrect private IP addresses binding file') log.error(str(e)) self.private_address_binding = {} self.router_private_address = {} self.last_line = '' self.transaction = None self.graph = DiGraph() self.routers = {} # router-id : lsa self.networks = {} # DR IP : lsa self.ext_networks = {} # (router-id, dest) : lsa self.controllers = defaultdict(list) # controller nr : ip_list self.listener = {} self.keep_running = True self.queue = Queue() self.processing_thread = Thread(target=self.process_lsa, name="lsa_processing_thread") self.processing_thread.start() def get_leader(self): return min(self.controllers.iterkeys()) def stop(self): for l in self.listener.values(): l.session.stop() self.keep_running = False self.queue.put('') def lsdb(self, lsa): if lsa.TYPE == RouterLSA.TYPE: return self.routers elif lsa.TYPE == NetworkLSA.TYPE: return self.networks elif lsa.TYPE == ASExtLSA.TYPE: return self.ext_networks def register_change_listener(self, listener): try: del self.listener[listener] log.info('Shapeshifter disconnected.') except KeyError: log.info('Shapeshifter connected.') l = ProxyCloner(ShapeshifterProxy, listener) self.listener[listener] = l l.boostrap_graph(graph=[(u, v, d.get('metric', -1)) for u, v, d in self.graph.edges(data=True)]) @staticmethod def extract_lsa_properties(lsa_part): d = {} for prop in lsa_part.split(SEP_INTER_FIELD): if not prop: continue key, val = prop.split(SEP_INTRA_FIELD) d[key] = val return d def commit_change(self, line): # Check that this is not a duplicate of a previous update ... if self.last_line == line: return self.queue.put(line) def forwarding_address_of(self, src, dst): """ Return the forwarding address for a src, dst pair. If src is specified, return the private 'link-local' address of the src-dst link, otherwise return a 'public' IP belonging to dst :param src: the source node of the link towards the FA, possibly null :param dst: the node owning the forwarding address :return: forwarding address (str) or None if no compatible address was found """ try: return self.graph[src][dst]['dst_address'] if src \ else self.graph[dst][self.graph.neighbors(dst)[0]]['src_address'] except KeyError: log.debug('%s-%s not found in graph', src, dst) return None def remove_lsa(self, lsa): lsdb = self.lsdb(lsa) try: del lsdb[lsa.key()] except KeyError: pass def add_lsa(self, lsa): lsdb = self.lsdb(lsa) lsdb[lsa.key()] = lsa def process_lsa(self): while self.keep_running: commit = False try: line = self.queue.get(timeout=5) if not line: self.queue.task_done() continue # Start parsing the LSA log action, lsa_info = line.split(SEP_ACTION) if action == BEGIN: self.transaction = Transaction() elif action == COMMIT: if self.transaction: self.transaction.commit(self) self.transaction = None commit = True else: lsa_parts = [self.extract_lsa_properties(part) for part in lsa_info.split(SEP_GROUP) if part] lsa = LSA.parse(LSAHeader.parse(lsa_parts.pop(0)), lsa_parts) log.debug('Parsed %s: %s', action, lsa) lsdb = self.lsdb(lsa) if action == REM: if not self.transaction: self.remove_lsa(lsa) else: self.transaction.remove_lsa(lsa) elif action == ADD: if not self.transaction: self.add_lsa(lsa) else: self.transaction.add_lsa(lsa) if lsa.push_update_on_remove() or not action == REM: commit = True self.queue.task_done() except Empty: if self.transaction: log.debug('Splitting transaction due to timeout') self.transaction.commit(self) self.transaction = Transaction() commit = True if commit: # Update graph accordingly new_graph = self.build_graph() # Compute graph difference and update it self.update_graph(new_graph) def __str__(self): strs = [str(lsa) for lsa in chain(self.routers.values(), self.networks.values(), self.ext_networks.values())] strs.insert(0, '* LSDB Content [%d]:' % len(strs)) return '\n'.join(strs) def build_graph(self): new_graph = DiGraph() # Rebuild the graph from the LSDB for lsa in chain(self.routers.values(), self.networks.values(), self.ext_networks.values()): lsa.apply(new_graph, self) # Contract all IPs to their respective router-id for lsa in self.routers.values(): lsa.contract_graph(new_graph, self.router_private_address.get( lsa.routerid, [])) # Figure out the controllers layout base_net = ip_network(CFG.get(DEFAULTSECT, 'base_net')) controller_prefix = CFG.getint(DEFAULTSECT, 'controller_prefixlen') # Group by controller and log them for ip in new_graph.nodes_iter(): addr = ip_address(ip) if addr in base_net: """1. Compute address diff to remove base_net 2. Right shift to remove host bits 3. Mask with controller mask """ id = (((int(addr) - int(base_net.network_address)) >> base_net.max_prefixlen - controller_prefix) & ((1 << controller_prefix) - 1)) self.controllers[id].append(ip) # Contract them on the graph for id, ips in self.controllers.iteritems(): contract_graph(new_graph, ips, 'C_%s' % id) # Remove generated self loops new_graph.remove_edges_from(new_graph.selfloop_edges()) self.apply_secondary_addresses(new_graph) return new_graph def update_graph(self, new_graph): added_edges = graph_diff(new_graph, self.graph) removed_edges = graph_diff(self.graph, new_graph) # Propagate differences if len(added_edges) > 0 or len(removed_edges) > 0: log.debug('Pushing changes') for u, v in added_edges: self.listener_add_edge(u, v, new_graph[u][v]['metric']) for u, v in removed_edges: self.listener_remove_edge(u, v) if CFG.getboolean(DEFAULTSECT, 'draw_graph'): draw_graph(new_graph) self.graph = new_graph log.info('LSA update yielded +%d -%d edges changes' % (len(added_edges), len(removed_edges))) def listener_add_edge(self, *args): for l in self.listener.values(): l.add_edge(*args) def listener_remove_edge(self, *args): for l in self.listener.values(): l.remove_edge(*args) def apply_secondary_addresses(self, graph): for subnet in self.private_address_binding.itervalues(): for dst, ip in subnet.iteritems(): for src in subnet.iterkeys(): if src == dst: continue try: graph[src][dst]['dst_address'] = ip except KeyError: pass
class GraphManager(object): """ Generates and processes the graph based on packets """ def __init__(self, packets, layer=3, geo_ip=os.path.expanduser('~/GeoIP.dat')): self.graph = DiGraph() self.layer = layer self.geo_ip = None self.data = {} try: self.geo_ip = GeoIP(geo_ip) except: logging.warning("could not load GeoIP data") if self.layer == 2: edges = map(self._layer_2_edge, packets) elif self.layer == 3: edges = map(self._layer_3_edge, packets) elif self.layer == 4: edges = map(self._layer_4_edge, packets) else: raise ValueError("Other layers than 2,3 and 4 are not supported yet!") for src, dst, packet in filter(lambda x: not (x is None), edges): if src in self.graph and dst in self.graph[src]: self.graph[src][dst]['packets'].append(packet) else: self.graph.add_edge(src, dst, {'packets': [packet]}) for node in self.graph.nodes(): self._retrieve_node_info(node) for src, dst in self.graph.edges(): self._retrieve_edge_info(src, dst) def get_in_degree(self, print_stdout=True): unsorted_degrees = self.graph.in_degree() return self._sorted_results(unsorted_degrees, print_stdout) def get_out_degree(self, print_stdout=True): unsorted_degrees = self.graph.out_degree() return self._sorted_results(unsorted_degrees, print_stdout) @staticmethod def _sorted_results(unsorted_degrees, print_stdout): sorted_degrees = OrderedDict(sorted(unsorted_degrees.items(), key=lambda t: t[1], reverse=True)) for i in sorted_degrees: if print_stdout: print(sorted_degrees[i], i) return sorted_degrees def _retrieve_node_info(self, node): self.data[node] = {} if self.layer >= 3 and self.geo_ip: if self.layer == 3: self.data[node]['ip'] = node elif self.layer == 4: self.data[node]['ip'] = node.split(':')[0] node_ip = self.data[node]['ip'] country = self.geo_ip.country_name_by_addr(node_ip) self.data[node]['country'] = country if country else 'private' #TODO layer 2 info? def _retrieve_edge_info(self, src, dst): edge = self.graph[src][dst] if edge: packets = edge['packets'] edge['layers'] = set(list(itertools.chain(*[set(GraphManager.get_layers(p)) for p in packets]))) edge['transmitted'] = sum(len(p) for p in packets) edge['connections'] = len(packets) @staticmethod def get_layers(packet): return list(GraphManager.expand(packet)) @staticmethod def expand(x): yield x.name while x.payload: x = x.payload yield x.name @staticmethod def _layer_2_edge(packet): return packet[0].src, packet[0].dst, packet @staticmethod def _layer_3_edge(packet): if packet.haslayer(IP): return packet[1].src, packet[1].dst, packet @staticmethod def _layer_4_edge(packet): if any(map(lambda p: packet.haslayer(p), [TCP, UDP])): src = packet[1].src dst = packet[1].dst _ = packet[2] return "%s:%i" % (src, _.sport), "%s:%i" % (dst, _.dport), packet def draw(self, filename=None, figsize=(50, 50)): graph = self.get_graphviz_format() for node in graph.nodes(): node.attr['shape'] = 'circle' node.attr['fontsize'] = '10' node.attr['width'] = '0.5' if 'country' in self.data[str(node)]: country_label = self.data[str(node)]['country'] if country_label == 'private': node.attr['label'] = str(node) else: node.attr['label'] = "%s (%s)" % (str(node), country_label) if not (country_label == 'private'): node.attr['color'] = 'blue' node.attr['style'] = 'filled' #TODO add color based on country or scan? for edge in graph.edges(): connection = self.graph[edge[0]][edge[1]] edge.attr['label'] = 'transmitted: %i bytes\n%s ' % (connection['transmitted'], ' | '.join(connection['layers'])) edge.attr['fontsize'] = '8' edge.attr['minlen'] = '2' edge.attr['penwidth'] = min(connection['connections'] * 1.0 / len(self.graph.nodes()), 2.0) graph.layout(prog='dot') graph.draw(filename) #TODO do we need a .dot file export? def get_graphviz_format(self, filename=None): agraph = networkx.to_agraph(self.graph) if filename: agraph.write(filename) return agraph
class ApkViewer(object): def __init__(self, a): self.a = a self.G = DiGraph() self.all_files = {} self.ids = {} root = Directory( "APK" ) root.set_color( "00FF00" ) self.ids[ root ] = len(self.ids) self.G.add_node( root ) for x, y, z in self.a.get_files_information(): print(x, y, z, os.path.basename(x)) l = [] splitall( x, l ) l.reverse() l.pop(0) last = root for i in l: if i not in self.all_files: tmp = Directory( i ) self.ids[ tmp ] = len(self.ids) self.all_files[ i ] = tmp else: tmp = self.all_files[ i ] self.G.add_edge(last, tmp) last = tmp n1 = last n2 = File( x, y, z ) self.G.add_edge(n1, n2) self.ids[ n2 ] = len(self.ids) def export_to_gml(self): buff = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" buff += "<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:y=\"http://www.yworks.com/xml/graphml\" xmlns:yed=\"http://www.yworks.com/xml/yed/3\" xsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns http://www.yworks.com/xml/schema/graphml/1.1/ygraphml.xsd\">\n" buff += "<key attr.name=\"description\" attr.type=\"string\" for=\"node\" id=\"d5\"/>\n" buff += "<key for=\"node\" id=\"d6\" yfiles.type=\"nodegraphics\"/>\n" buff += "<graph edgedefault=\"directed\" id=\"G\">\n" for node in self.G.nodes(): print(node) buff += "<node id=\"%d\">\n" % self.ids[node] buff += "<data key=\"d6\">\n" buff += "<y:ShapeNode>\n" buff += "<y:Geometry height=\"%f\" width=\"%f\"/>\n" % (60.0, 7 * node.width) buff += "<y:Fill color=\"#%s\" transparent=\"false\"/>\n" % node.color buff += "<y:NodeLabel>\n" buff += "%s\n" % node.basename if isinstance(node, File): buff += "%s\n" % node.file_type buff += "%s\n" % hex(node.file_crc) buff += "</y:NodeLabel>\n" buff += "</y:ShapeNode>\n" buff += "</data>\n" buff += "</node>\n" nb = 0 for edge in self.G.edges(): buff += "<edge id=\"%d\" source=\"%d\" target=\"%d\">\n" % (nb, self.ids[edge[0]], self.ids[edge[1]]) buff += "</edge>\n" nb += 1 buff += "</graph>\n" buff += "</graphml>\n" return buff
def _update_one_step(self): """ experiment advance by one iteration """ max_iter = self._set_info["max_iter"] if self._is_directed: old_graph, new_graph = DiGraph(), DiGraph() else: old_graph, new_graph = Graph(), Graph() old_graph.add_edges_from(self._recommender._user_connections) old_tot_conns = len(old_graph.edges()) new_connections = [] tot_suggestions = 0 tot_rejections = 0 if self._iteration < max_iter: start_time = datetime.now() uniq_user_ids = self._recommender._user_ids for ii, user_id in enumerate(uniq_user_ids): # retrieve recommended users # if user_id in self._rejected_user_dict: # block_list = self._rejected_user_dict[user_id] # else: # block_list = [] suggestions = self._recommender.gen_suggestion(user_id) accepted, rejected = self._clicker.click(user_id, suggestions) tot_suggestions += len(suggestions) tot_rejections += len(rejected) # append new connections if len(accepted) > 0: pairs = [[user_id, new_friend] for new_friend in accepted] if len(new_connections) == 0: new_connections = pairs else: new_connections.extend(pairs) # track rejected users # for some clicker simulator which, yeild rejected of empty # all the time to allow re-suggestions on recommended users if len(rejected) > 0: if user_id in self._rejected_user_dict: self._rejected_user_dict[user_id].extend(rejected) else: self._rejected_user_dict[user_id] = rejected # consolidate new connections new_connections = array(new_connections) # tracking experiment progress self._iteration += 1 if new_connections.shape[0] > 0: # update simulator's connection data # self.load_init_user_connections(updated_user_connections) self._recommender.add_new_connections(new_connections) self._recommender.update() self._no_growth_counter = 0 new_graph.add_edges_from(self._recommender._user_connections) new_tot_conns = len(new_graph.edges()) new_added_conns = new_tot_conns - old_tot_conns duration = datetime.now() - start_time total_cost = duration.total_seconds() # collect evaluation scores self._evaluator.load_eval_user_connections(self._recommender._user_connections) eval_score = self._evaluator.get_score() cand_size = 0 recommender_memory = copy.deepcopy(self._recommender._ordered_cand_dict) for k in recommender_memory.keys(): cand_size += len(recommender_memory[k]) # measure the network if self._is_directed: now_graph = DiGraph() else: now_graph = Graph() if self._total_edges_ref is None: if self._is_directed: ref_graph = DiGraph() else: ref_graph = Graph() ref_graph.add_edges_from(self._evaluator._ref_user_connections) self._total_edges_ref = len(ref_graph.edges()) now_graph.add_edges_from(self._recommender._user_connections) now_num_edges = len(now_graph.edges()) ref_num_edges = self._total_edges_ref # collect information exp_record = {"iteration": self._iteration, "start_time": start_time.strftime("%Y-%m-%d %H:%M:%S"), "time_cost_seconds": total_cost, "num_new_connections_size": new_added_conns, "old_user_connections_size": old_tot_conns, "now_user_connections_size": now_num_edges, "ref_user_connections_size": ref_num_edges, "tot_suggestions": tot_suggestions, "tot_rejections": tot_rejections, "tot_remain_candidates": cand_size } exp_record.update(eval_score) # mark advacned of experiment self._recommender.update_iteration() return exp_record else: msg = "experiment had reached the maximum iteration (max: " + str(max_iter) + ")" warnings.warn(msg)
class MacroManager(object): """This class manages the macros specified in the configuration file. The parameters of each section, along with their dependencies are passed to the class. Then, it verifies that the dependencies are correct (they form a DAG and respect the sections dependencies) and creates an ordered list of the macros to be used when replacing their actual values in a given combination. """ def __init__(self): """Create a new MacroManager object.""" self.dep_graph = DiGraph() self.ds_macros = set([]) self.xp_macros = set([]) self.__define_test_macros() def __define_test_macros(self): """Define values and dependencies of test macros. A set of macros are defined by default, including input and output directories of datasets and experiments and their identifiers. """ self.test_macros = { "data_base_dir": "/tests/data", "out_base_dir": "/tests/out", "data_dir": "/tests/data/0", # data_base_dir/ds_id "out_dir": "/tests/out/0", # data_base_dir/comb_id "comb_id": 0, "ds_id": 0, "xp.input": "/tests/data/0", # data_dir "xp.output": "/tests/out/0" # out_dir } self.ds_params = set([]) self.xp_params = set([]) self.dep_graph.add_nodes_from(self.test_macros.keys()) self.add_dependency("data_base_dir", "data_dir") self.add_dependency("ds_id", "data_dir") self.add_dependency("data_dir", "xp.input") self.add_dependency("out_base_dir", "out_dir") self.add_dependency("comb_id", "out_dir") self.add_dependency("out_dir", "xp.output") self.sorted_test_macros = topological_sort(self.dep_graph) def update_test_macros(self, ds_id=None, comb_id=None): """Update test macros with dataset and/or combination ids. Args: ds_id (int, optional): The dataset identifier. comb_id (int, optional): The combination identifier. """ if ds_id: if "data_dir" in self.test_macros: self.test_macros["data_dir"] = \ self.test_macros["data_base_dir"] + "/" + str(ds_id) if "xp.input" in self.test_macros: self.test_macros["xp.input"] = \ self.test_macros["data_dir"] if comb_id: if "out_dir" in self.test_macros: self.test_macros["out_dir"] = \ self.test_macros["out_base_dir"] + "/" + str(comb_id) if "xp.output" in self.test_macros: self.test_macros["xp.output"] = \ self.test_macros["out_dir"] def __filter_unused_test_macros(self): for m in reversed(self.sorted_test_macros): if not self.dep_graph.successors(m): self.dep_graph.remove_node(m) self.sorted_test_macros.remove(m) del self.test_macros[m] def add_ds_params(self, params): """Add the list of dataset parameters. Args: params (dict): The list of dataset parameters. """ self.ds_params = self.ds_params.union(params) def add_xp_params(self, params): """Add the list of experiment parameters. Args: params (dict): The list of experiment parameters. """ self.xp_params = self.xp_params.union(params) def add_dependency(self, m1, m2): """Include a new macro dependency: m1 -> m2. This means that to obtain the value of m2 we use the value of m1. Args: m1 (string): The name of the param used. m2 (string): The name of the param being specified. Raises: MacroException: If the order of sections (test -> ds -> xp) is not respected. """ # Check if dependency is correct if m1 in self.ds_params: if m2 in self.test_macros: logger.error("Not allowed dependency: ds -> test") raise MacroException("Not allowed dependency: ds -> test") elif m1 in self.xp_params: if m2 in self.test_macros: logger.error("Not allowed dependency: xp -> test") raise MacroException("Not allowed dependency: xp -> test") elif m2 in self.ds_params: logger.error("Not allowed dependency: xp -> ds") raise MacroException("Not allowed dependency: xp -> ds") # Add dependency self.dep_graph.add_edge(m1, m2) def sort_macros(self): """Sort macros respecting dependencies. Raises: MacroException: If there are cycles in dependencies between macros. """ # Filter out unused test variables self.__filter_unused_test_macros() # Sort ds and xp macros try: self.sorted_ds_macros = \ topological_sort(self.dep_graph.subgraph(self.ds_params)) self.sorted_xp_macros = \ topological_sort(self.dep_graph.subgraph(self.xp_params)) except NetworkXUnfeasible: raise MacroException("Macros do not follow a DAG") logger.info("Dependencies = " + str(self.dep_graph.edges())) logger.info("Test macros = " + str(self.sorted_test_macros)) logger.info("Dataset macros = " + str(self.sorted_ds_macros)) logger.info("Experiment macros = " + str(self.sorted_xp_macros)) def _replace_macros_from_list(self, list_macros, value): """Replace the macros given in the list within the value if present. Args: list_macros (dict): The list of macros to replace and their respective values. value (string): The value where to do the replacement. """ new_value = value for m in list_macros: new_value = new_value.replace("${" + m + "}", str(list_macros[m])) return new_value def replace_ds_macros(self, comb): """Replace macros in ds combination. Args: comb (dict): The combination of parameters. """ list_macros = self.test_macros for m in self.sorted_ds_macros: comb[m] = self._replace_macros_from_list(list_macros, comb[m]) list_macros[m] = comb[m] def replace_xp_macros(self, comb): """Replace macros in xp combination. Args: comb (dict): The combination of parameters. """ list_macros = self.test_macros for m in self.sorted_ds_macros: comb[m] = self._replace_macros_from_list(list_macros, comb[m]) list_macros[m] = comb[m] for m in self.sorted_xp_macros: comb[m] = self._replace_macros_from_list(list_macros, comb[m]) list_macros[m] = comb[m]
def load(self,fname, verbose=True, **kwargs): """ Load a data file. The expected data format is three columns (comma seperated by default) with source, target, flux. No header should be included and the node IDs have to run contuously from 0 to Number_of_nodes-1. Parameters ---------- fname : str Path to the file verbose : bool Print information about the data. True by Default kwargs : dict Default parameters can be changed here. Supported key words are dtype : float (default) delimiter : "," (default) return_graph : bool If True, the graph is returned (False by default). Returns: -------- The graph is saved internally in self.graph. """ delimiter = kwargs["delimiter"] if "delimiter" in kwargs.keys() else " " data = np.genfromtxt(fname, delimiter=delimiter, dtype=int, unpack=False) source, target = data[:,0], data[:,1] if data.shape[1] > 2: flux = data[:,2] else: flux = np.ones_like(source) nodes = set(source) | set(target) self.nodes = len(nodes) lines = len(flux) if set(range(self.nodes)) != nodes: new_node_ID = {old:new for new,old in enumerate(nodes)} map_new_node_ID = np.vectorize(new_node_ID.__getitem__) source = map_new_node_ID(source) target = map_new_node_ID(target) if verbose: print "\nThe node IDs have to run continuously from 0 to Number_of_nodes-1." print "Node IDs have been changed according to the requirement.\n-----------------------------------\n" print 'Lines: ',lines , ', Nodes: ', self.nodes print '-----------------------------------\nData Structure:\n\nsource, target, weight \n' for ii in range(7): print "%i, %i, %1.2e" %(source[ii], target[ii], flux[ii]) print '-----------------------------------\n' G = DiGraph() # Empty, directed Graph G.add_nodes_from(range(self.nodes)) for ii in xrange(lines): u, v, w = int(source[ii]), int(target[ii]), float(flux[ii]) if u != v: # ignore self loops assert not G.has_edge(u,v), "Edge appeared twice - not supported" G.add_edge(u,v,weight=w) else: if verbose: print "ignore self loop at node", u symmetric = True for s,t,w in G.edges(data=True): w1 = G[s][t]["weight"] try: w2 = G[t][s]["weight"] except KeyError: symmetric = False G.add_edge(t,s,weight=w1) w2 = w1 if w1 != w2: symmetric = False G[s][t]["weight"] += G[t][s]["weight"] G[s][t]["weight"] /= 2 G[t][s]["weight"] = G[s][t]["weight"] if verbose: if not symmetric: print "The network has been symmetricised." ccs = strongly_connected_component_subgraphs(G) ccs = sorted(ccs, key=len, reverse=True) G_GSCC = ccs[0] if G_GSCC.number_of_nodes() != G.number_of_nodes(): G = G_GSCC if verbose: print "\n--------------------------------------------------------------------------" print "The network has been restricted to the giant strongly connected component." self.nodes = G.number_of_nodes() for u, v, data in G.edges(data=True): weight = G.out_degree(u,weight='weight') data['transition_rate'] = 1.*data['weight']/weight for u, v, data in G.edges(data=True): data['effective_distance'] = 1. - log(data['transition_rate']) if verbose: print "\n--------------------------------------------------------------------------" print "\nnode ID, out-weight, normalized out-weight, sum of effective distances \n " for ii in range(7): out_edges = G.out_edges(ii, data=True) out_weight, effective_distance, transition_rate = 0, 0, 0 for u, v, data in out_edges: out_weight += data["weight"] effective_distance += data["effective_distance"] transition_rate += data["transition_rate"] print " %i %1.2e %2.3f %1.2e " %(ii,out_weight, transition_rate, effective_distance) print "\n ... graph is saved in self.graph" return G
def gen_graph(self, seed, sheet=None): """ Given a starting point (e.g., A6, or A3:B7) on a particular sheet, generate a Spreadsheet instance that captures the logic and control flow of the equations. """ # starting points cursheet = sheet if sheet is not None else self.excel.get_active_sheet() self.excel.set_sheet(cursheet) # no need to output nr and nc here, since seed can be a list of unlinked cells seeds, nr, nc = Cell.make_cells(self.excel, seed, sheet=cursheet) seeds = list(flatten(seeds)) logger.debug("Seed %s expanded into %s cells" % (seed, len(seeds))) # only keep seeds with formulas or numbers seeds = [s for s in seeds if s.formula or isinstance(s.value, number_types)] logger.debug("%s filtered seeds" % len(seeds)) # cells to analyze: only formulas todo = [s for s in seeds if s.formula] logger.debug("%s cells on the todo list" % len(todo)) # map of all cells cellmap = OrderedDict([(x.address(), x) for x in seeds]) # directed graph graph = DiGraph() # match the info in cellmap for cell in itervalues(cellmap): self.add_node_to_graph(graph, cell) while todo: c1 = todo.pop() logger.debug("Handling {}".format(c1.address())) # set the current sheet so relative addresses resolve properly if c1.sheet != cursheet: cursheet = c1.sheet self.excel.set_sheet(cursheet) # parse the formula into code pystr, ast = self.cell2code(c1) # set the code & compile it (will flag problems sooner rather than later) c1.python_expression = pystr c1.compile() # get all the cells/ranges this formula refers to deps = [x.tvalue.replace('$', '') for x in ast.nodes() if isinstance(x, RangeNode)] # remove dupes deps = uniqueify(deps) for dep in deps: # if the dependency is a multi-cell range, create a range object if is_range(dep): # this will make sure we always have an absolute address rng = CellRange(dep, sheet=cursheet) if rng.address() in cellmap: # already dealt with this range # add an edge from the range to the parent graph.add_edge(cellmap[rng.address()], cellmap[c1.address()]) continue else: # turn into cell objects cells, nrows, ncols = Cell.make_cells(self.excel, dep, sheet=cursheet) # get the values so we can set the range value if nrows == 1 or ncols == 1: rng.value = [c.value for c in cells] else: rng.value = [[c.value for c in cells[j]] for j in range(len(cells))] # save the range cellmap[rng.address()] = rng # add an edge from the range to the parent self.add_node_to_graph(graph, rng) graph.add_edge(rng, cellmap[c1.address()]) # cells in the range should point to the range as their parent target = rng else: # not a range, create the cell object cells = [Cell.resolve_cell(self.excel, dep, sheet=cursheet)] target = cellmap[c1.address()] # process each cell for c2 in flatten(cells): # if we haven't treated this cell already if c2.address() not in cellmap: if c2.formula: # cell with a formula, needs to be added to the `todo` list todo.append(c2) else: # constant cell, no need for further processing, just remember to set the code pystr, ast = self.cell2code(c2) c2.python_expression = pystr if not isinstance(pystr, (int, float)): c2.compile() # save in the cellmap cellmap[c2.address()] = c2 # add to the graph self.add_node_to_graph(graph, c2) # add an edge from the cell to the parent (range or cell) graph.add_edge(cellmap[c2.address()], target) logger.info("Graph construction done, %s nodes, %s edges, %s cellmap entries" % (len(graph.nodes()), len(graph.edges()), len(cellmap))) sp = Spreadsheet(graph=graph, cellmap=cellmap, filename=self.filename) return sp
class GVMAnalysis : def __init__(self, vmx, apk) : self.vmx = vmx self.vm = self.vmx.get_vm() self.__nodes = {} self.__nodes_id = {} self.G = DiGraph() for j in self.vmx.tainted_packages.get_internal_packages() : n1 = self._get_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() ) n2 = self._get_node( j.get_class_name(), j.get_name(), j.get_descriptor() ) m1 = self.vm.get_method_descriptor( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() ) m2 = j.get_method() n1.set_attributes( { "android_api" : libsign.entropy( self.vmx.get_method_signature(m1, "L4", { "L4" : { "arguments" : ["Landroid"] } } ).get_string() ) } ) n2.set_attributes( { "android_api" : libsign.entropy( self.vmx.get_method_signature(m2, "L4", { "L4" : { "arguments" : ["Landroid"] } } ).get_string() ) } ) n1.set_attributes( { "java_api" : libsign.entropy( self.vmx.get_method_signature(m1, "L4", { "L4" : { "arguments" : ["Ljava"] } } ).get_string() ) } ) n2.set_attributes( { "java_api" : libsign.entropy( self.vmx.get_method_signature(m2, "L4", { "L4" : { "arguments" : ["Ljava"] } } ).get_string() ) } ) self.G.add_edge( n1.id, n2.id ) # print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \ # j.get_bb().start + j.get_idx(), \ # j.get_class_name(), j.get_name(), j.get_descriptor()) if apk != None : for i in apk.get_activities() : j = bytecode.FormatClassToJava(i) n1 = self._get_exist_node( j, "onCreate", "(Landroid/os/Bundle;)V" ) if n1 != None : n1.set_attributes( { "type" : "activity" } ) n1.set_attributes( { "color" : ACTIVITY_COLOR } ) n2 = self._get_new_node_from( n1, "ACTIVITY" ) n2.set_attributes( { "color" : ACTIVITY_COLOR } ) self.G.add_edge( n2.id, n1.id ) for i in apk.get_services() : j = bytecode.FormatClassToJava(i) n1 = self._get_exist_node( j, "onCreate", "()V" ) if n1 != None : n1.set_attributes( { "type" : "service" } ) n1.set_attributes( { "color" : SERVICE_COLOR } ) n2 = self._get_new_node_from( n1, "SERVICE" ) n2.set_attributes( { "color" : SERVICE_COLOR } ) self.G.add_edge( n2.id, n1.id ) for i in apk.get_receivers() : j = bytecode.FormatClassToJava(i) n1 = self._get_exist_node( j, "onReceive", "(Landroid/content/Context; Landroid/content/Intent;)V" ) if n1 != None : n1.set_attributes( { "type" : "receiver" } ) n1.set_attributes( { "color" : RECEIVER_COLOR } ) n2 = self._get_new_node_from( n1, "RECEIVER" ) n2.set_attributes( { "color" : RECEIVER_COLOR } ) self.G.add_edge( n2.id, n1.id ) for c in self.vm.get_classes() : #if c.get_superclassname() == "Landroid/app/Service;" : # n1 = self._get_node( c.get_name(), "<init>", "()V" ) # n2 = self._get_node( c.get_name(), "onCreate", "()V" ) # self.G.add_edge( n1.id, n2.id ) if c.get_superclassname() == "Ljava/lang/Thread;" : for i in self.vm.get_method("run") : if i.get_class_name() == c.get_name() : n1 = self._get_node( i.get_class_name(), i.get_name(), i.get_descriptor() ) n2 = self._get_node( i.get_class_name(), "start", i.get_descriptor() ) self.G.add_edge( n1.id, n2.id ) list_permissions = self.vmx.get_permissions( [] ) for x in list_permissions : for j in list_permissions[ x ] : #print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \ # j.get_bb().start + j.get_idx(), \ # j.get_class_name(), j.get_name(), j.get_descriptor()) n1 = self._get_exist_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() ) if n1 == None : continue n1.set_attributes( { "permissions" : 1 } ) n1.set_attributes( { "permissions_level" : DVM_PERMISSIONS[ "MANIFEST_PERMISSION" ][ x ][0] } ) try : for tmp_perm in PERMISSIONS_RISK[ x ] : if tmp_perm in DEFAULT_RISKS : n2 = self._get_new_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() + " " + DEFAULT_RISKS[ tmp_perm ][0], DEFAULT_RISKS[ tmp_perm ][0] ) n2.set_attributes( { "color" : DEFAULT_RISKS[ tmp_perm ][1] } ) self.G.add_edge( n2.id, n1.id ) except KeyError : pass for m, _ in self.vmx.tainted_packages.get_packages() : if m.get_info() == "Ldalvik/system/DexClassLoader;" : for path in m.get_paths() : if path.get_access_flag() == TAINTED_PACKAGE_CREATE : n1 = self._get_exist_node( path.get_method().get_class_name(), path.get_method().get_name(), path.get_method().get_descriptor() ) n2 = self._get_new_node( path.get_method().get_class_name(), path.get_method().get_name(), path.get_method().get_descriptor() + " " + "DEXCLASSLOADER", "DEXCLASSLOADER" ) n1.set_attributes( { "dynamic_code" : "true" } ) n2.set_attributes( { "color" : DEXCLASSLOADER_COLOR } ) self.G.add_edge( n2.id, n1.id ) def _get_exist_node(self, class_name, method_name, descriptor) : key = "%s %s %s" % (class_name, method_name, descriptor) try : return self.__nodes[ key ] except KeyError : return None def _get_node(self, class_name, method_name, descriptor) : key = "%s %s %s" % (class_name, method_name, descriptor) if key not in self.__nodes : self.__nodes[ key ] = NodeF( len(self.__nodes), class_name, method_name, descriptor ) self.__nodes_id[ self.__nodes[ key ].id ] = self.__nodes[ key ] return self.__nodes[ key ] def _get_new_node_from(self, n, label) : return self._get_new_node( n.class_name, n.method_name, n.descriptor + label, label ) def _get_new_node(self, class_name, method_name, descriptor, label) : key = "%s %s %s" % (class_name, method_name, descriptor) if key not in self.__nodes : self.__nodes[ key ] = NodeF( len(self.__nodes), class_name, method_name, descriptor, label ) self.__nodes_id[ self.__nodes[ key ].id ] = self.__nodes[ key ] return self.__nodes[ key ] def export_to_gexf(self, output) : fd = open(output, "w") fd.write( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ) fd.write( "<gexf xmlns=\"http://www.gephi.org/gexf\" xmlns:viz=\"http://www.gephi.org/gexf/viz\">\n" ) fd.write( "<graph type=\"static\">\n") fd.write( "<attributes class=\"node\" type=\"static\">\n" ) fd.write( "<attribute default=\"normal\" id=\"%d\" title=\"type\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "type"] ) fd.write( "<attribute id=\"%d\" title=\"class_name\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "class_name"] ) fd.write( "<attribute id=\"%d\" title=\"method_name\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "method_name"] ) fd.write( "<attribute id=\"%d\" title=\"descriptor\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "descriptor"] ) fd.write( "<attribute default=\"0\" id=\"%d\" title=\"permissions\" type=\"integer\"/>\n" % ID_ATTRIBUTES[ "permissions"] ) fd.write( "<attribute default=\"normal\" id=\"%d\" title=\"permissions_level\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "permissions_level"] ) fd.write( "<attribute default=\"0.0\" id=\"%d\" title=\"android_api\" type=\"float\"/>\n" % ID_ATTRIBUTES[ "android_api"] ) fd.write( "<attribute default=\"0.0\" id=\"%d\" title=\"java_api\" type=\"float\"/>\n" % ID_ATTRIBUTES[ "java_api"] ) fd.write( "<attribute default=\"false\" id=\"%d\" title=\"dynamic_code\" type=\"boolean\"/>\n" % ID_ATTRIBUTES[ "dynamic_code"] ) fd.write( "</attributes>\n" ) fd.write( "<nodes>" ) for node in self.G.node : fd.write( "<node id=\"%d\" label=\"%s\">\n" % (node, escape(self.__nodes_id[ node ].label)) ) fd.write( self.__nodes_id[ node ].get_attributes() ) fd.write( "</node>\n" ) fd.write( "</nodes>\n" ) fd.write( "<edges>\n" ) nb = 0 for edge in self.G.edge : for link in self.G.edges( edge ) : fd.write( "<edge id=\"%d\" source=\"%d\" target=\"%d\"/>\n" % (nb, link[0], link[1]) ) nb += 1 fd.write( "</edges>\n") fd.write("</graph>\n") fd.write("</gexf>\n") fd.close()
Purpose: Created: 2018/7/25 """ import unittest from lpp import * from networkx import DiGraph if __name__ == '__main__': RAW = open(sys.argv[1],'rU') END = open(sys.argv[2],'w') has = {} seq = "" number = 1 network = DiGraph() for line in RAW: line_l = line.strip().split() if line_l[0] not in has: has[line_l[0]] = "" start = line_l[7]+line_l[8] else: end = line_l[7]+line_l[8] network.add_edge(start,end) start = end for start ,end in network.edges(): END.write(start+'\t'+end+'\n') #END.write('>scaffold%s\n'%(number)+seq+'\n')