Beispiel #1
0
def graph_equals(
        g1: nx.DiGraph,
        g2: nx.DiGraph,
        weight_column_name: Text = 'weight') -> bool:
    """Checks if two graphs are equal.

    If weight_column_name is None, then it does not check weight values.

    Args:
        g1: First graph to be compared.

        g2: Second graph to be compared.

        weight_column_name: The name of weight column.

    Returns:
        Boolean whether g1 equals g2 or not.

    Raises:
        None.
    """
    if g1.nodes() != g2.nodes():
        return False
    if g1.edges() != g2.edges():
        return False
    if weight_column_name:
        for edge in g1.edges():
            w1 = g1.get_edge_data(edge[0], edge[1])[weight_column_name]
            w2 = g2.get_edge_data(edge[0], edge[1])[weight_column_name]
            if w1 != w2:
                return False
    return True
    def postprocessing(self, graph: nx.DiGraph):
        non_decidable_arrow = []
        for src in self.non_doable:
            non_decidable_arrow += [(src, dst)
                                    for dst in list(graph.neighbors(src))]

        undirected_edge = set()
        for (src, dst) in non_decidable_arrow:
            # two undecidable case
            if (dst, src) not in undirected_edge and (
                    dst, src) in non_decidable_arrow:
                undirected_edge = undirected_edge | {(src, dst)}
                graph.remove_edge(src, dst) if graph.has_edge(src,
                                                              dst) else None
                graph.remove_edge(dst, src) if graph.has_edge(dst,
                                                              src) else None

            # one decidable with one undecidable
            elif (dst, src) not in non_decidable_arrow and graph.has_edge(
                    dst, src):
                graph.remove_edge(src, dst)
        print('undirected', undirected_edge)
        if len(undirected_edge) == 0:
            return graph
        else:
            pdag = PDAG(directed_ebunch=list(graph.edges()),
                        undirected_ebunch=list(undirected_edge))
            return pdag.to_dag(required_edges=list(graph.edges()))
Beispiel #3
0
def merge_graphs(process_model_graph: nx.DiGraph,
                 check_point_graph: nx.DiGraph) -> nx.DiGraph:
    """
    Receives two graphs and merge them.
    The first is the PMG (process model graph) and the second it the CP (check point) graph.
    The PMG, then, incorporates the second graph.
    Before the merge, 5% of the PMG's weight is decayed.

    Parameters
    --------------------------------------
    process_model_graph: nx.DiGraph,
        PMG graph
    check_point_graph: nx.DiGraph,
        CP graph
    Returns
    --------------------------------------
    process_model_graph: nx.DiGraph,
        PMG after merge
    """
    for node1, node2, data in process_model_graph.edges(data=True):
        data['weight'] *= 0.95

    for node1, node2, data in check_point_graph.edges(data=True):
        path = (node1, node2)
        if path in process_model_graph.edges:
            process_model_graph[node1][node2]['weight'] += data['weight']
            process_model_graph[node1][node2]['time'] += data['time']
        else:
            process_model_graph.add_edge(*path,
                                         weight=data['weight'],
                                         time=data['time'])

    return normalize_graph(process_model_graph)
Beispiel #4
0
def draw_graph(graph: nx.DiGraph):
    good_edges = set((u, v) for u, v, data in graph.edges(data=True)
                     if _get_edge_class(data) == EdgeClass.GOOD)
    good_nodes = set(v for (u, v) in good_edges)
    bad_edges = set((u, v) for u, v, data in graph.edges(data=True)
                    if _get_edge_class(data) == EdgeClass.BAD)
    bad_nodes = set(v for (u, v) in bad_edges)

    nodes_pos = nx.spring_layout(graph)
    nx.draw_networkx_nodes(graph, pos=nodes_pos, alpha=0.6, node_color="gray")
    nx.draw_networkx_nodes(graph,
                           pos=nodes_pos,
                           alpha=0.8,
                           nodelist=good_nodes,
                           node_color="g")
    nx.draw_networkx_nodes(graph,
                           pos=nodes_pos,
                           alpha=0.8,
                           nodelist=bad_nodes,
                           node_color="r")
    nx.draw_networkx_labels(graph, pos=nodes_pos, font_size=8)
    nx.draw_networkx_edges(graph, pos=nodes_pos, alpha=0.25)
    nx.draw_networkx_edges(graph,
                           edgelist=good_edges,
                           pos=nodes_pos,
                           alpha=0.5,
                           width=4,
                           edge_color="g")
    nx.draw_networkx_edges(graph,
                           edgelist=bad_edges,
                           pos=nodes_pos,
                           alpha=0.5,
                           width=4,
                           edge_color="r")
Beispiel #5
0
    def __dfs(self, v: int, cur_graph: nx.DiGraph, cur_graph_inv: nx.DiGraph,
              valid_graphs: List[nx.DiGraph], max_graphs: int):
        if len(valid_graphs) > max_graphs: return
        # endsに含まれていて入次数が0。
        if v in self.ends and len(cur_graph_inv.edges([v])) == 0:
            return
        # 最後の頂点
        if v == max(self.ends):
            # 使われていない頂点は除いたgraphを作成する
            g_generated = nx.DiGraph()
            g_generated.add_edges_from(cur_graph.edges)
            valid_graphs.append(g_generated)
            return

        # 自分への入次数が0かつstartsに含まれない
        if len(cur_graph_inv.edges([v])) == 0 and (not v in self.starts):
            self.__dfs(v + 1, cur_graph, cur_graph_inv, valid_graphs,
                       max_graphs)
            return

        # 自分への入次数が1以上かstart
        edges = self.g.edges([v])
        # for edge_selection in range(1, 1 << len(edges)):
        for edge_selection in reversed(list(range(1, 1 << len(edges)))):
            for i, (_, to) in enumerate(edges):
                if (1 << i) & edge_selection:
                    cur_graph.add_edge(v, to)
                    cur_graph_inv.add_edge(to, v)
            self.__dfs(v + 1, cur_graph, cur_graph_inv, valid_graphs,
                       max_graphs)
            for i, (_, to) in enumerate(edges):
                if (1 << i) & edge_selection:
                    cur_graph.remove_edge(v, to)
                    cur_graph_inv.remove_edge(to, v)
Beispiel #6
0
def get_sort_edge(matrix, train_graph: nx.DiGraph, test_graph: nx.DiGraph,
                  max_k):
    min_value = np.min(matrix) - 1
    for train_edge in list(train_graph.edges()):
        #matrix[train_edge[0],train_edge[1]]=min_value #保证训练集中出现过的边为最小值
        matrix[train_edge[0]][train_edge[1]] = min_value
    for node in train_graph.nodes:
        matrix[node][node] = min_value
    test_map = dict(
        zip(train_graph.nodes,
            [[] for _ in range(train_graph.number_of_nodes())]))
    for edge, weight in test_graph.edges().items():
        if int(weight['weight']) == 0:
            continue
        sender, receiver = edge
        test_map[sender].append(receiver)
    result = []

    print('start multi', datetime.now())
    data = [(i, max_k, matrix[i]) for i in range(len(matrix))]
    print('end finish prepare data', datetime.now())
    pool = Pool(50)
    print('start map', datetime.now())
    result = pool.map(multi_sort, data)
    pool.close()
    pool.join()

    return np.array(list(result))
Beispiel #7
0
def solveBiobjectiveSP(G: nx.DiGraph, source: int, sink: int, objVal) -> dict:
    """
    Computes a (weakly) non-dominated point of the biobjective shortest paths problem.
    :param G: directed graph
    :param source: Source node in G
    :param sink: Sink node in G
    :param objVal: Bound on second objective
    :return: Dict with objective value and path
    """
    BiobjSP = Model('BiobjSP')

    # Variables
    X = dict()

    for u, v in G.edges():
        X[u, v] = BiobjSP.addVar(vtype=GRB.BINARY, name=f'X_{u}_{v}')

    # Objective function
    BiobjSP.setObjective(quicksum(X[u, v] * G[u][v]['length1']
                                  for u, v in G.edges()),
                         sense=GRB.MINIMIZE)

    # Constraints
    for v in G.nodes():
        if v == source:
            BiobjSP.addConstr(
                quicksum(X[a] for a in G.out_edges(v)) -
                quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, 1)
        elif v == sink:
            BiobjSP.addConstr(
                quicksum(X[a] for a in G.out_edges(v)) -
                quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, -1)
        else:
            BiobjSP.addConstr(
                quicksum(X[a] for a in G.out_edges(v)) -
                quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, 0)

    BiobjSP.addConstr(
        quicksum(X[u, v] * G[u][v]['length2'] for u, v in G.edges()),
        GRB.LESS_EQUAL, objVal - 1)

    # Solve model
    BiobjSP.update()
    BiobjSP.optimize()

    if BiobjSP.status == GRB.OPTIMAL:
        SP = dict()
        SP['objVal'] = (BiobjSP.objVal,
                        sum(
                            BiobjSP.getVarByName(f'X_{u}_{v}').x *
                            G[u][v]['length2'] for u, v in G.edges()))
        SP['path'] = list()
        for u, v in G.edges():
            if round(BiobjSP.getVarByName(f'X_{u}_{v}').x, 0) == 1:
                SP['path'].append((u, v))
        return SP

    else:
        return dict()
Beispiel #8
0
def same_edges(g1: nx.DiGraph, g2: nx.DiGraph):
    """
    Check if 2 digraphs have the same edges
    :param g1:
    :param g2:
    :return:
    """
    return set(g1.edges()) == set(g2.edges())
Beispiel #9
0
def equivalent_singlegraphs(g1_single: nx.DiGraph,
                            g2_single: nx.DiGraph) -> bool:
    return all([
        g1_single.get_edge_data(*e) == g2_single.get_edge_data(*e)
        for e in g1_single.edges()
    ] + [
        g1_single.get_edge_data(*e) == g2_single.get_edge_data(*e)
        for e in g2_single.edges()
    ]) & (g1_single.nodes() == g2_single.nodes())
Beispiel #10
0
class TestsIssue20(unittest.TestCase):
    """
    Tests for issue #20
    https://github.com/torressa/cspy/issues/20
    """

    def setUp(self):
        # Create simple digraph with appropriate attributes
        self.G = DiGraph(directed=True, n_res=2)
        self.G.add_edge("Source", 1, weight=10, res_cost=array([1, 1]))
        self.G.add_edge("Source", 2, weight=10, res_cost=array([1, 1]))
        self.G.add_edge("Source", 3, weight=10, res_cost=array([1, 1]))
        self.G.add_edge(1, "Sink", weight=-10, res_cost=array([1, 0]))
        self.G.add_edge(2, "Sink", weight=-10, res_cost=array([1, 0]))
        self.G.add_edge(3, "Sink", weight=-10, res_cost=array([1, 0]))
        self.G.add_edge(3, 2, weight=-5, res_cost=array([1, 1]))
        self.G.add_edge(2, 1, weight=-10, res_cost=array([1, 1]))

        self.max_res, self.min_res = [len(self.G.edges()), 2], [0, 0]

    @parameterized.expand(zip(range(100), range(100)))
    def testBiDirectional(self, _, seed):
        """
        Find shortest path of simple test digraph using BiDirectional
        """
        bidirec = BiDirectional(self.G, self.max_res, self.min_res, seed=seed)
        bidirec.run()
        path = bidirec.path
        cost = bidirec.total_cost
        total_res = bidirec.consumed_resources
        # Check path
        self.assertEqual(path, ['Source', 2, 1, 'Sink'])
        # Check attributes
        self.assertEqual(cost, -10)
        self.assertTrue(all(total_res == [3, 2]))
        self.assertTrue(all(e in self.G.edges() for e in zip(path, path[1:])))

    def testTabu(self):
        """
        Find shortest path of simple test digraph using Tabu
        """
        tabu = Tabu(self.G, self.max_res, self.min_res)
        tabu.run()
        path = tabu.path
        cost = tabu.total_cost
        total_res = tabu.consumed_resources
        # Check attributes
        self.assertEqual(cost, -5)
        self.assertTrue(all(total_res == [3, 2]))
        self.assertEqual(path, ['Source', 3, 2, 'Sink'])
        # Check path
        self.assertTrue(all(e in self.G.edges() for e in zip(path, path[1:])))
def solveMaxFlow(G: nx.DiGraph, source: int, sink: int) -> dict:
    """
    Solves the maximum flow problem.
    :param G: directed graph
    :param source: Source node in G
    :param sink: Sink node in G
    :return: Dict of edges and flow values
    """
    maxFlow = Model('MaxFlow')

    # Variable
    X = dict()

    for a in G.edges():
        X[a] = maxFlow.addVar(vtype=GRB.CONTINUOUS,
                              lb=0,
                              ub=G.get_edge_data(*a)['capacity'],
                              name=f'X_{a}')

    B = maxFlow.addVar(vtype=GRB.CONTINUOUS, lb=0, name='B')

    # Objective function
    maxFlow.setObjective(B, sense=GRB.MAXIMIZE)

    # Constraints
    for v in G.nodes():
        if v == source:
            maxFlow.addConstr(
                quicksum(X[a] for a in G.out_edges(v)) -
                quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, B)
        elif v == sink:
            maxFlow.addConstr(
                quicksum(X[a] for a in G.out_edges(v)) -
                quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, -B)
        else:
            maxFlow.addConstr(
                quicksum(X[a] for a in G.out_edges(v)) -
                quicksum(X[a] for a in G.in_edges(v)), GRB.EQUAL, 0)

    # Solve model
    maxFlow.update()
    maxFlow.optimize()

    if maxFlow.status == GRB.OPTIMAL:
        flows = dict()
        for a in G.edges():
            if maxFlow.getVarByName(f'X_{a}').x > 0:
                flows[a] = maxFlow.getVarByName(f'X_{a}').x
        return flows

    else:
        return dict()
Beispiel #12
0
def maximal_non_branching_paths(graph: nx.DiGraph) -> list:
    paths = []
    for node in graph:
        if not is_in_1_out_1(graph, node) and graph.out_degree(node) > 0:
            for v, w in graph.edges(node):
                non_branching_path = [v, w]
                while is_in_1_out_1(graph, w):
                    u = graph.edges(w)[0][1]
                    non_branching_path.append(u)
                    w = u
                paths.append(non_branching_path)

    return paths + isolated_cycles(graph)
Beispiel #13
0
class TestsIssue20(unittest.TestCase):
    """Tests for issue #20
    https://github.com/torressa/cspy/issues/20
    """
    def setUp(self):
        # Create simple digraph with appropriate attributes
        self.G = DiGraph(directed=True, n_res=2)
        self.G.add_edge("Source", 1, weight=10, res_cost=array([1, 1]))
        self.G.add_edge("Source", 2, weight=10, res_cost=array([1, 1]))
        self.G.add_edge("Source", 3, weight=10, res_cost=array([1, 1]))
        self.G.add_edge(1, "Sink", weight=-10, res_cost=array([1, 0]))
        self.G.add_edge(2, "Sink", weight=-10, res_cost=array([1, 0]))
        self.G.add_edge(3, "Sink", weight=-10, res_cost=array([1, 0]))
        self.G.add_edge(3, 2, weight=-5, res_cost=array([1, 1]))
        self.G.add_edge(2, 1, weight=-10, res_cost=array([1, 1]))
        # Maximum and minimum resource arrays
        self.max_res, self.min_res = [len(self.G.edges()), 2], [0, 0]
        # Expected results
        self.result_path = ['Source', 2, 1, 'Sink']
        self.total_cost = -10
        self.consumed_resources = [3, 2]

    def test_bidirectional(self):
        """
        Test BiDirectional with randomly chosen sequence of directions
        for a range of seeds.
        """
        alg = BiDirectional(self.G,
                            self.max_res,
                            self.min_res,
                            elementary=True)
        alg.run()
        self.assertEqual(alg.path, self.result_path)
        self.assertEqual(alg.total_cost, self.total_cost)
        self.assertEqual(alg.consumed_resources, self.consumed_resources)
        self.assertTrue(
            all(e in self.G.edges() for e in zip(alg.path, alg.path[1:])))

    def test_tabu(self):
        """
        Find shortest path of using Tabu
        """
        alg = Tabu(self.G, self.max_res, self.min_res)
        alg.run()
        self.assertEqual(alg.path, self.result_path)
        self.assertEqual(alg.total_cost, self.total_cost)
        self.assertTrue(all(alg.consumed_resources == self.consumed_resources))
        self.assertTrue(
            all(e in self.G.edges() for e in zip(alg.path, alg.path[1:])))
Beispiel #14
0
def kruskal_min_spanning_tree(G : nx.DiGraph):
    Q = []
    S = disjoint_set.DisjointSet()
    for e in G.edges():
        u,v = e
        w = G.edges()[u,v]["weight"]
        heapq.heappush(Q, (w, e))

    T = nx.DiGraph()
    while Q:
        w, (u,v) = heapq.heappop(Q)
        if S.connected(u,v): continue
        S.union(u,v)
        T.add_edge(u,v, weight=w)
    return T 
def normalize_graph(graph: nx.DiGraph) -> nx.DiGraph:
    """
    Time and weight normalization for each edge in the graph.
    Time normalization is the mean time of an edge.
    Trace normalization is based on the graph weights

    Parameters
    --------------------------------------
    graph: nx.DiGraph,
        Graph to be normalized
    Returns
    --------------------------------------
    graph: nx.DiGraph,
        Normalized graph
    """
    edges = graph.edges(data=True)
    attributes: "list[dict[str, float]]" = [
        attributes for _, _, attributes in edges
    ]

    weights = map(lambda attribute: attribute.get("weight"), attributes)
    max_weight = max(weights)

    for data in attributes:
        edge_weight = data.get("weight")
        edge_time = data.get("time")

        data["weight_normalized"] = edge_weight / max_weight
        data["time_normalized"] = edge_time / edge_weight

    return graph
def common_edge_ratio(ref_user_connections, eval_user_connections, is_directed=False):
    """ caulcalate the fraction of common edges fraction out of union of two graphs

    Parameters:
    ==========
    ref_user_connections: a list of edges
    eval_user_connections: a list of edges
    is_directed: boolean,
        False (default): edges forms an undirected graph
        True: edges forms a directed graph
    """
    ref_user_connections = _normalize_connections(ref_user_connections, is_directed)
    eval_user_connections = _normalize_connections(eval_user_connections, is_directed)

    if is_directed:
        ref_graph, eval_graph = DiGraph(), DiGraph()
    else:
        ref_graph, eval_graph = Graph(), Graph()

    ref_graph.add_edges_from(ref_user_connections)
    eval_graph.add_edges_from(eval_user_connections)

    ref_edges, eval_edges = ref_graph.edges(), eval_graph.edges()

    tot_common = sum([1 if edge in ref_edges else 0 for edge in eval_edges])
    union_size = len(ref_edges) + len(eval_edges) - tot_common
    return tot_common / union_size
Beispiel #17
0
def sequential_subgraph_nodes(g: nx.DiGraph,
                              size: int) -> List[List[Union[str, int]]]:

    if not nx.is_weakly_connected(g):
        raise nx.NetworkXUnfeasible(
            "sequential solutions are not possible for disconnected graphs.")

    if size <= 1:
        raise nx.NetworkXUnfeasible(
            "the minimum directed subgraph length is 2 nodes.")

    g = nx.DiGraph(g.edges())  # make a copy because we'll modify the structure

    graphs = []

    while len(g.nodes()) > 1:

        sg = find_leafy_branch_larger_than_size(g, size)

        sg_nodes = list(nx.lexicographical_topological_sort(sg))
        graphs.append(sg_nodes)

        # trim the upstream nodes out of the graph, except the upstream root
        us_nodes = [n for n, deg in sg.out_degree if deg > 0]
        g = g.subgraph([n for n in g.nodes() if n not in us_nodes])

        # rinse and repeat until there's one or fewer nodes left in the graph

    return graphs
Beispiel #18
0
def build_dict_graph(nodes: nx.DiGraph) -> dict:
    graph = {}
    for left, right in nodes.edges():
        if left not in graph:
            graph[left] = []
        graph[left].append(right)
    return graph
Beispiel #19
0
    def prune_graph_simple(self, graph: nx.DiGraph,
                           flow: Tuple[int, int]) -> nx.DiGraph:
        """
        Remove cycles between flow source and sink. Uses distances to give a
        partial topological order then removes edges that take us in the wrong
        direction. Simple but removes more paths than necessary.
        Args:
            graph: graph to DAGify
            flow: source and sink of the flow

        Returns:
            A DAG with source at the start and sink at the end
        """
        graph = graph.copy()

        # first calculate distance to sink for each vertex
        distances = collections.defaultdict(int)
        distance_results = nx.shortest_path_length(graph,
                                                   source=None,
                                                   target=flow[1],
                                                   weight='route_weight')
        distances.update(distance_results)

        # now we prune edges that take us further from the destination so
        # that there are no cycles
        for (src, dst) in list(graph.edges()):
            if distances[dst] >= distances[src]:
                graph.remove_edge(src, dst)
        return graph
Beispiel #20
0
    def load_dependency_graph(self):
        dep_path = Config.get("dependency_graph")
        self.log.info('Loading model dependency graph', path = dep_path)

        try:
            dep_graph_str = open(dep_path).read()

            # joint_dependencies is of the form { Model1 -> [(Model2, src_port, dst_port), ...] }
            # src_port is the field that accesses Model2 from Model1
            # dst_port is the field that accesses Model1 from Model2
            joint_dependencies = json.loads(dep_graph_str)

            model_dependency_graph = DiGraph()
            for src_model, deps in joint_dependencies.items():
                for dep in deps:
                    dst_model, src_accessor, dst_accessor = dep
                    if src_model != dst_model:
                        edge_label = {'src_accessor': src_accessor,
                                      'dst_accessor': dst_accessor}
                        model_dependency_graph.add_edge(
                            src_model, dst_model, edge_label)

            model_dependency_graph_rev = model_dependency_graph.reverse(
                copy=True)
            self.model_dependency_graph = {
                # deletion
                True: model_dependency_graph_rev,
                False: model_dependency_graph
            }
            self.log.info("Loaded dependencies", edges = model_dependency_graph.edges())
        except Exception as e:
            self.log.exception("Error loading dependency graph", e = e)
            raise e
Beispiel #21
0
	def mean_weight(self):
		edges=DiGraph.edges(self,data='weight')
		sum=0.0
		for edge in edges:
			sum=sum+edge[2]
		mean_weight=sum/float(len(edges))
		return mean_weight
Beispiel #22
0
def gravity_demand(graph: nx.DiGraph) -> Demand:
    """
    Generates gravity demand (deterministic, based on bandwidth) for one time
    step
    Args:
        graph: Networkx DiGraph with 'weight' on edges to generate demand from
    Returns:
        A demand array
    """
    num_nodes = graph.number_of_nodes()
    sorted_edges = sorted(graph.edges(data=True))
    edge_weights = [e[2]['weight'] for e in sorted_edges]

    total_flow = sum(edge_weights)
    node_in_flow = np.zeros(num_nodes, np.float32)
    node_out_flow = np.zeros(num_nodes, np.float32)

    for i, edge in enumerate(sorted_edges):
        node_in_flow[edge[1]] += edge_weights[i]
        node_out_flow[edge[0]] += edge_weights[i]

    return np.divide(
        np.array([
            node_out_flow[i] * node_in_flow[j] for i in range(num_nodes)
            for j in range(num_nodes) if i != j
        ]), total_flow * 10)
Beispiel #23
0
def create_triples(
    graph: nx.DiGraph,
    node2int: Optional[Mapping] = None,
    relation: Any = 0,
    create_int_ids: bool = False
) -> Tuple[List[Tuple[Any, Any, Any]], Mapping]:

    if node2int is None and create_int_ids:
        node2int = create_unique_int_ids(graph)

    if node2int is None:
        node2int = {}

        def n2i(n):
            node2int[n] = n

            return n
    else:

        def n2i(n):
            return node2int[n]

    samples = [(n2i(head), n2i(tail), relation)
               for head, tail in graph.edges()]

    return samples, node2int
Beispiel #24
0
    def _get_single_path(  # pylint: disable=too-many-arguments, too-many-locals
        self,
        graph: DiGraph,
        source: Address,
        target: Address,
        value: PaymentAmount,
        address_to_reachability: Dict[Address, AddressReachability],
        visited: Dict[ChannelID, float],
        disallowed_paths: List[List[Address]],
        fee_penalty: float,
    ) -> Optional[Path]:
        # update edge weights
        for node1, node2 in graph.edges():
            edge = graph[node1][node2]
            backwards_edge = graph[node2][node1]
            edge["weight"] = self.edge_weight(
                visited=visited,
                view=edge["view"],
                view_from_partner=backwards_edge["view"],
                amount=value,
                fee_penalty=fee_penalty,
            )

        # find next path
        all_paths: Iterable[List[Address]] = nx.shortest_simple_paths(
            G=graph, source=source, target=target, weight="weight")
        try:
            # skip duplicates and invalid paths
            path = next(
                p for p in (Path(self.G, nodes, value, address_to_reachability)
                            for nodes in all_paths)
                if p.is_valid and p.nodes not in disallowed_paths)
            return path
        except StopIteration:
            return None
def simplify_debt_graph(debt_graph: nx.DiGraph) -> nx.DiGraph:
    # I can only examine one cycle at a time because I delete edges
    try:
        cycle = next(nx.simple_cycles(debt_graph))
    except StopIteration:
        return debt_graph

    # Get all edges of cycle
    edges = []
    for i in range(len(cycle)):
        if i == len(cycle) - 1:
            edges.append(debt_graph[cycle[i]][cycle[0]])
        else:
            edges.append(debt_graph[cycle[i]][cycle[i + 1]])

    # Find min edge weight
    min_edge_weight = min([e['weight'] for e in edges])

    # Subtract edge min weight
    for edge in edges:
        edge['weight'] -= min_edge_weight

    # Delete edge(s) with weight zero
    ebunch = []
    for u, v, data in debt_graph.edges(data=True):
        if data['weight'] == 0:
            ebunch.append((u, v))
    debt_graph.remove_edges_from(ebunch)

    return simplify_debt_graph(debt_graph)
def add_style_interactionsigns(igraph: networkx.DiGraph):
    """
    Sets attributes for the arrow head and edge color of interactions to indicate the interaction sign.
    Activating interactions get the attributes *"arrowhead"="normal"* and *"color"="black"*,
    inhibiting interactions get the attributes *"arrowhead"="tee"* and *"color"="red"*, and
    ambivalent interaction get the attributes *"arrowhead"="dot"* and *"color"="blue"*.

    **arguments**:
        * *igraph*: interaction graph

    **example**::

          >>> add_style_interactionsigns(igraph)
    """

    for source, target, attr in sorted(igraph.edges(data=True)):
        if attr["sign"] == {1, -1}:
            igraph.adj[source][target]["arrowhead"] = "dot"
            igraph.adj[source][target]["color"] = "dodgerblue"

        elif attr["sign"] == {-1}:
            igraph.adj[source][target]["arrowhead"] = "tee"
            igraph.adj[source][target]["color"] = "red"

        elif attr["sign"] == {1}:
            igraph.adj[source][target]["arrowhead"] = "normal"
            igraph.adj[source][target]["color"] = "black"
def add_style_activities(igraph: networkx.DiGraph, activities: Union[str, dict], color_active: str = "/paired10/5", color_inactive: str = "/paired10/1"):
    """
    Sets attributes for the color and fillcolor of nodes to indicate which variables are activated and which are inhibited in *Activities*.
    All activated or inhibited components get the attribute *"color"="black"*.
    Activated components get the attribute *"fillcolor"="red"* and
    inactivated components get the attribute *"fillcolor"="blue"*.
    Interactions involving activated or inhibited nodes get the attribute *"color"="gray"* to reflect that they are ineffective.

    **arguments**:
        * *igraph*: interaction graph
        * *activities*: activated and inhibited nodes
        * *color_active*: color in dot format for active components
        * *color_inactive*: color in dot format for inactive components

    **example**::

          >>> activities = {"ERK":1, "MAPK":0}
          >>> add_style_activities(igraph, activities)
    """

    names = sorted(igraph.nodes())
    if type(activities) is str:
        activities = subspace2dict(names, activities)

    for name in igraph.nodes():
        if name in activities:
            igraph.nodes[name]["color"] = "black"
            igraph.nodes[name]["fillcolor"] = color_active if activities[name] == 1 else color_inactive

    for x, y in igraph.edges():
        if x in activities or y in activities:
            igraph.adj[x][y]["color"] = "gray"
Beispiel #28
0
def is_cycle(alert_sub_g: nx.DiGraph, is_ordered: bool = True):
    alert_id = alert_sub_g.graph["alert_id"]
    edges = alert_sub_g.edges(data=True)
    cycles = list(nx.simple_cycles(
        alert_sub_g))  # Use simple_cycles function directly (subgraph is small enough)
    if len(cycles) != 1:
        logging.info("Alert %s is not a cycle pattern" % alert_id)
        return False
    if is_ordered:
        edges.sort(key=lambda e: e[2]["date"])
        next_orig = None
        next_amt = sys.float_info.max
        next_date = datetime.strptime("1970-01-01", "%Y-%m-%d")
        for orig, bene, attr in edges:
            if next_orig is not None and orig != next_orig:
                logging.info("Alert %s is not a cycle pattern" % alert_id)
                return False
            else:
                next_orig = bene

            amount = attr["amount"]
            if amount == next_amt:
                logging.info("Alert %s cycle transaction amounts are unordered" % alert_id)
                return False
            else:
                next_amt = amount

            date = attr["date"]
            if date < next_date:
                logging.info("Alert %s cycle transactions are chronologically unordered" % alert_id)
                return False
            else:
                next_date = date
    return True
Beispiel #29
0
def check_initial_routes(initial_routes: list = None, G: DiGraph = None):
    """
    Checks if initial routes are consistent.
    TODO : check if it is entirely feasible depending on VRP type.
    One way of doing it : run the subproblem by fixing variables corresponding to initial solution.
    """

    # Check if routes start at Sink and end at Node

    for route in initial_routes:
        if route[0] != "Source" or route[-1] != "Sink":
            raise ValueError("Route %s must start at Source and end at Sink" %
                             route)
    # Check if every node is in at least one route
    for v in G.nodes():
        if v not in ["Source", "Sink"]:
            node_found = 0
            for route in initial_routes:
                if v in route:
                    node_found += 1
            if node_found == 0:
                raise KeyError("Node %s missing from initial solution." % v)
    # Check if edges from initial solution exist and have cost attribute
    for route in initial_routes:
        edges = list(zip(route[:-1], route[1:]))
        for (i, j) in edges:
            if (i, j) not in G.edges():
                raise KeyError("Edge (%s,%s) in route %s missing in graph." %
                               (i, j, route))
            if "cost" not in G.edges[i, j]:
                raise KeyError("Edge (%s,%s) has no cost attribute." % (i, j))
Beispiel #30
0
    def annotate_graph_with_features(self,
                                     g: nx.DiGraph,
                                     include_target: bool = True):
        # one-hot encode the graph nodes
        one_hot_encoded = to_one_hot(torch.arange(0, self.n_parts),
                                     self.n_parts)

        new_g = nx.DiGraph()
        for n, data in g.nodes(data=True):
            new_g.add_node(n, **data)
        for n1, n2, edata in g.edges(data=True):
            edata["features"] = np.array([0.0])
            if include_target:
                edata["target"] = np.array([1.0])
            new_g.add_edge(n1, n2, **edata)
        if include_target:
            self.steady_state(new_g, node_to_part=lambda x: x[-1])
        for n, ndata in new_g.nodes(data=True):
            # convert this to ONE HOT!
            ndata["features"] = one_hot_encoded[list(n)[-1]]
            if include_target:
                ndata["target"] = torch.tensor([ndata["y"].flatten()],
                                               dtype=torch.float)
        new_g.data = {"features": torch.tensor([0])}
        if include_target:
            new_g.data["target"] = torch.tensor([0])

        return new_g
Beispiel #31
0
def check_vrp(G: DiGraph = None):
    """Checks if graph is well defined."""

    # if G is not a DiGraph
    if not isinstance(G, DiGraph):
        raise TypeError(
            "Input graph must be of type networkx.classes.digraph.DiGraph.")
    for v in ["Source", "Sink"]:
        # If Source or Sink is missing
        if v not in G.nodes():
            raise KeyError("Input graph requires Source and Sink nodes.")
        # If Source has incoming edges
        if len(list(G.predecessors("Source"))) > 0:
            raise NetworkXError("Source must have no incoming edges.")
        # If Sink has outgoing edges
        if len(list(G.successors("Sink"))) > 0:
            raise NetworkXError("Sink must have no outgoing edges.")
    # Roundtrips should always be possible
    # Missing edges are added with a high cost
    for v in G.nodes():
        if v not in ["Source", "Sink"]:
            if v not in G.successors("Source"):
                logger.warning("Source not connected to %s" % v)
                G.add_edge("Source", v, cost=1e10)
            if v not in G.predecessors("Sink"):
                logger.warning("%s not connected to Sink" % v)
                G.add_edge(v, "Sink", cost=1e10)
    # If graph is disconnected
    if not has_path(G, "Source", "Sink"):
        raise NetworkXError("Source and Sink are not connected.")
    # If cost is missing
    for (i, j) in G.edges():
        if "cost" not in G.edges[i, j]:
            raise KeyError("Edge (%s,%s) requires cost attribute" % (i, j))
Beispiel #32
0
def to_ail_supergraph(transition_graph: networkx.DiGraph) -> networkx.DiGraph:
    """
    Takes an AIL graph and converts it into a AIL graph that treats calls and redundant jumps
    as parts of a bigger block instead of transitions. Calls to returning functions do not terminate basic blocks.

    Based on region_identifier super_graph

    :return: A converted super transition graph
    """
    # make a copy of the graph
    transition_graph = networkx.DiGraph(transition_graph)

    while True:
        for src, dst, data in transition_graph.edges(data=True):
            type_ = data.get('type', None)

            if len(list(transition_graph.successors(src))) == 1 and len(list(transition_graph.predecessors(dst))) == 1:
                # calls in the middle of blocks OR boring jumps
                if (type_ == 'fake_return') or (src.addr + src.original_size == dst.addr):
                    _merge_ail_nodes(transition_graph, src, dst)
                    break

            # calls to functions with no return
            elif type_ == 'call':
                transition_graph.remove_node(dst)
                break
        else:
            break

    return transition_graph
    def remove_all_nodes_but_calls_and_subscripts(
            graph: networkx.DiGraph) -> networkx.DiGraph:
        """
        Removes all nodes that can not be a operator we might care about
        """
        def process_node(node, _):
            if node.operation in {"Import", "Constant"}:
                graph.remove_node(node)
            elif node.operation in {"Assign", "Keyword", "List", "Tuple"}:
                parents = list(graph.predecessors(node))
                children = list(graph.successors(node))
                for parent_node in parents:
                    for child_node in children:
                        graph.add_edge(parent_node, child_node)
                graph.remove_node(node)
            elif node.operation in {"Call", "Subscript", "Subscript-Assign"}:
                pass
            elif node == WirExtractor.NOT_FOUND_WIR:
                pass
            else:
                print("Unknown WIR Node Type: {}".format(node))
                assert False

        traverse_graph_and_process_nodes(graph, process_node)

        # By modifying edges, most labels are lost, so we remove the rest of them too
        for (_, _, edge_attributes) in graph.edges(data=True):
            edge_attributes.clear()

        return graph
Beispiel #34
0
def add_style_tendencies(stg: networkx.DiGraph):
    """
    Sets or overwrites the edge colors to reflect whether a transition increases values (*black*),
    decreases values (*red*), or both (*blue*) which is only possible for non-asynchronous transitions.

    **arguments**:
        * *stg*: state transition graph

    **example**::

          >>> add_style_tendencies(stg)
    """

    for source, target, attr in sorted(stg.edges(data=True)):
        inc = any([source[x] + target[x] == "01" for x in range(len(source))])
        dec = any([source[x] + target[x] == "10" for x in range(len(source))])

        if inc and dec:
            stg.adj[source][target]["color"] = "dodgerblue"

        if inc:
            continue

        if dec:
            stg.adj[source][target]["color"] = "red"
Beispiel #35
0
def test_remove_node():
    mock_mapp = DiGraph()
    mock_mapp.add_node('X')
    mock_mapp.add_edges_from([('A', 'B', {'TP': ['X']}),
                              ('B', 'C', {'TP': ['Y']})])
    MapGraph.remove_node.im_func(mock_mapp, 'X')
    nt.assert_equal(mock_mapp.edges(), [('B', 'C')])
def outcoming_edges(graph: nx.DiGraph, node: int or str) -> (int or str, int or str):
    edges = []
    for node_out, node_in in graph.edges():
        if type(node) == str:
            if node_out == str(node):
                edges.append((str(node_out), str(node_in)))
        else:
            if node_out == node:
                edges.append((node_out, node_in))
    return edges
Beispiel #37
0
	def filter_edges(self,thresh):
		edges=DiGraph.edges(self,data='weight')
		for edge in edges:
			if edge[2]<thresh:
				DiGraph.remove_edge(self,edge[0],edge[1])

				if DiGraph.in_degree(self,edge[0])==0 and DiGraph.out_degree(self,edge[0])==0:
					DiGraph.remove_node(self,edge[0])

				if DiGraph.in_degree(self,edge[1])==0 and DiGraph.out_degree(self,edge[1])==0:
					DiGraph.remove_node(self,edge[1])
def incidence_matrix(graph: nx.DiGraph) -> scs.csc_matrix:
    edges = graph.edges()
    nodes = sorted(graph.nodes())
    result = scs.lil_matrix((nodes.__len__(), edges.__len__()))
    for i in range(nodes.__len__()):
        for j in range(edges.__len__()):
            u, v = edges[j]
            if nodes[i] == u:
                result[i, j] = 1
            elif nodes[i] == v:
                result[i, j] = -1
    return result.tocsc()
Beispiel #39
0
	def median_weight(self):
		edges=DiGraph.edges(self,data='weight')
		mlist=[]
		for edge in edges:
			mlist.append(edge[2])
		n=len(mlist)
		mlist=sorted(mlist)
		if n%2==0:
			x=mlist[round(n/2)]+mlist[round(n/2)+1]
			median_weight=float(x)/float(2)
		else:
			median_weight=mlist[round(n/2)]
		return median_weight
Beispiel #40
0
    def load_dependency_graph(self):

        try:
            if Config.get("dependency_graph"):
                self.log.debug(
                    "Loading model dependency graph",
                    path=Config.get("dependency_graph"),
                )
                dep_graph_str = open(Config.get("dependency_graph")).read()
            else:
                self.log.debug("Using default model dependency graph", graph={})
                dep_graph_str = "{}"

            # joint_dependencies is of the form { Model1 -> [(Model2, src_port, dst_port), ...] }
            # src_port is the field that accesses Model2 from Model1
            # dst_port is the field that accesses Model1 from Model2
            static_dependencies = json.loads(dep_graph_str)
            dynamic_dependencies = (
                []
            )  # Dropped Service and ServiceInstance dynamic dependencies

            joint_dependencies = dict(
                list(static_dependencies.items()) + dynamic_dependencies
            )

            model_dependency_graph = DiGraph()
            for src_model, deps in joint_dependencies.items():
                for dep in deps:
                    dst_model, src_accessor, dst_accessor = dep
                    if src_model != dst_model:
                        edge_label = {
                            "src_accessor": src_accessor,
                            "dst_accessor": dst_accessor,
                        }
                        model_dependency_graph.add_edge(
                            src_model, dst_model, **edge_label
                        )

            model_dependency_graph_rev = model_dependency_graph.reverse(copy=True)
            self.model_dependency_graph = {
                # deletion
                True: model_dependency_graph_rev,
                False: model_dependency_graph,
            }
            self.log.debug("Loaded dependencies", edges=model_dependency_graph.edges())
        except Exception as e:
            self.log.exception("Error loading dependency graph", e=e)
            raise e
def main(access_token, package_name, max_depth):
    graph = DiGraph()
    graphcommons = GraphCommons(access_token)
    import_package_dependencies(graph, package_name, max_depth=max_depth)

    signals = []

    for (node, data) in graph.nodes(data=True):

        if data['type'] == 'PACKAGE':
            reference = "https://www.npmjs.com/package/%s" % node
        else:
            reference = 'https://www.npmjs.com/~%s' % node

        signals.append(Signal(
            action="node_create",
            name=node,
            type=data['type'],
            reference=reference
        ))

    for source, target, data in graph.edges(data=True):

        signals.append(Signal(
            action="edge_create",
            from_name=source,
            from_type=graph.node[source]['type'],
            to_name=target,
            to_type=graph.node[target]['type'],
            name=data['type'],
            weight=1
        ))

    created_graph = graphcommons.new_graph(
        name="Dependency Network of %s" % package_name,
        description="Dependency Network of %s Package" % package_name,
        signals=signals
    )

    print 'Created Graph URL:'
    print 'https://graphcommons.com/graphs/%s' % created_graph.id
Beispiel #42
0
def isolated_cycles(graph: nx.DiGraph) -> list:
    cycles = []
    cycled_nodes = set()
    for node in graph:
        w = node
        cycle = [w]
        is_isolated_cycle = True
        while is_isolated_cycle:
            if not is_in_1_out_1(graph, w) or w in cycled_nodes:
                is_isolated_cycle = False
            elif w == node:
                break
            else:
                u = graph.edges(w)[0][1]
                cycle.append(u)
                w = u

        if is_isolated_cycle:
            cycles.append(cycle)
            cycled_nodes.add(node)
    return cycles
Beispiel #43
0
class GVMAnalysis :
    def __init__(self, vmx, apk) :
        self.vmx = vmx
        self.vm = self.vmx.get_vm()

        self.nodes = {}
        self.nodes_id = {}
        self.entry_nodes = [] 
        self.G = DiGraph()

        for j in self.vmx.get_tainted_packages().get_internal_packages() :
            n1 = self._get_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() )
            n2 = self._get_node( j.get_class_name(), j.get_name(), j.get_descriptor() )

            self.G.add_edge( n1.id, n2.id )
            n1.add_edge( n2, j )
        #    print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \
        #                                            j.get_bb().start + j.get_idx(), \
        #                                            j.get_class_name(), j.get_name(), j.get_descriptor())

        if apk != None :
            for i in apk.get_activities() :
                j = bytecode.FormatClassToJava(i)
                n1 = self._get_exist_node( j, "onCreate", "(Landroid/os/Bundle;)V" )
                if n1 != None : 
                    n1.set_attributes( { "type" : "activity" } )
                    n1.set_attributes( { "color" : ACTIVITY_COLOR } )
                    n2 = self._get_new_node_from( n1, "ACTIVITY" )
                    n2.set_attributes( { "color" : ACTIVITY_COLOR } )
                    self.G.add_edge( n2.id, n1.id )
                    self.entry_nodes.append( n1.id )
            for i in apk.get_services() :
                j = bytecode.FormatClassToJava(i)
                n1 = self._get_exist_node( j, "onCreate", "()V" )
                if n1 != None : 
                    n1.set_attributes( { "type" : "service" } )
                    n1.set_attributes( { "color" : SERVICE_COLOR } )
                    n2 = self._get_new_node_from( n1, "SERVICE" )
                    n2.set_attributes( { "color" : SERVICE_COLOR } )
                    self.G.add_edge( n2.id, n1.id )
                    self.entry_nodes.append( n1.id )
            for i in apk.get_receivers() :
                j = bytecode.FormatClassToJava(i)
                n1 = self._get_exist_node( j, "onReceive", "(Landroid/content/Context; Landroid/content/Intent;)V" )
                if n1 != None : 
                    n1.set_attributes( { "type" : "receiver" } )
                    n1.set_attributes( { "color" : RECEIVER_COLOR } )
                    n2 = self._get_new_node_from( n1, "RECEIVER" )
                    n2.set_attributes( { "color" : RECEIVER_COLOR } )
                    self.G.add_edge( n2.id, n1.id )
                    self.entry_nodes.append( n1.id )

        # Specific Java/Android library
        for c in self.vm.get_classes() :
            #if c.get_superclassname() == "Landroid/app/Service;" :
            #    n1 = self._get_node( c.get_name(), "<init>", "()V" )
            #    n2 = self._get_node( c.get_name(), "onCreate", "()V" )

            #    self.G.add_edge( n1.id, n2.id )
            if c.get_superclassname() == "Ljava/lang/Thread;" or c.get_superclassname() == "Ljava/util/TimerTask;" :
                for i in self.vm.get_method("run") :
                    if i.get_class_name() == c.get_name() :
                        n1 = self._get_node( i.get_class_name(), i.get_name(), i.get_descriptor() )
                        n2 = self._get_node( i.get_class_name(), "start", i.get_descriptor() ) 
                       
                        # link from start to run
                        self.G.add_edge( n2.id, n1.id )
                        n2.add_edge( n1, {} )

                        # link from init to start
                        for init in self.vm.get_method("<init>") :
                            if init.get_class_name() == c.get_name() :
                                n3 = self._get_node( init.get_class_name(), "<init>", init.get_descriptor() )
                                #n3 = self._get_node( i.get_class_name(), "<init>", i.get_descriptor() )
                                self.G.add_edge( n3.id, n2.id )
                                n3.add_edge( n2, {} )

            #elif c.get_superclassname() == "Landroid/os/AsyncTask;" :
            #    for i in self.vm.get_method("doInBackground") :
            #        if i.get_class_name() == c.get_name() :
            #            n1 = self._get_node( i.get_class_name(), i.get_name(), i.get_descriptor() )
            #            n2 = self._get_exist_node( i.get_class_name(), "execute", i.get_descriptor() )
            #            print n1, n2, i.get_descriptor()
                        #for j in self.vm.get_method("doInBackground") :
                        #    n2 = self._get_exist_node( i.get_class_name(), j.get_name(), j.get_descriptor() )
                        #    print n1, n2
                        # n2 = self._get_node( i.get_class_name(), "
            #    raise("ooo")

        #for j in self.vmx.tainted_packages.get_internal_new_packages() :
        #    print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \
        #                                            j.get_bb().start + j.get_idx(), \
        #                                            j.get_class_name(), j.get_name(), j.get_descriptor())


        list_permissions = self.vmx.get_permissions( [] ) 
        for x in list_permissions :
            for j in list_permissions[ x ] :
                #print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \
                #                                    j.get_bb().start + j.get_idx(), \
                #                                    j.get_class_name(), j.get_name(), j.get_descriptor())
                n1 = self._get_exist_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() )

                if n1 == None :
                    continue

                n1.set_attributes( { "permissions" : 1 } )
                n1.set_attributes( { "permissions_level" : DVM_PERMISSIONS[ "MANIFEST_PERMISSION" ][ x ][0] } )
                n1.set_attributes( { "permissions_details" : x } )

                try :
                    for tmp_perm in PERMISSIONS_RISK[ x ] :
                        if tmp_perm in DEFAULT_RISKS :
                            n2 = self._get_new_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() + " " + DEFAULT_RISKS[ tmp_perm ][0],
                                                     DEFAULT_RISKS[ tmp_perm ][0] )
                            n2.set_attributes( { "color" : DEFAULT_RISKS[ tmp_perm ][1] } )
                            self.G.add_edge( n2.id, n1.id )

                            n1.add_risk( DEFAULT_RISKS[ tmp_perm ][0] )
                            n1.add_api( x, j.get_class_name() + "-" + j.get_name() + "-" + j.get_descriptor() )
                except KeyError :
                    pass

        # Tag DexClassLoader
        for m, _ in self.vmx.get_tainted_packages().get_packages() :
            if m.get_info() == "Ldalvik/system/DexClassLoader;" :
                for path in m.get_paths() :
                    if path.get_access_flag() == TAINTED_PACKAGE_CREATE :
                        n1 = self._get_exist_node( path.get_method().get_class_name(), path.get_method().get_name(), path.get_method().get_descriptor() )    
                        n2 = self._get_new_node( path.get_method().get_class_name(), path.get_method().get_name(), path.get_method().get_descriptor() + " " + "DEXCLASSLOADER",
                                                 "DEXCLASSLOADER" )

                        n1.set_attributes( { "dynamic_code" : "true" } )
                        n2.set_attributes( { "color" : DEXCLASSLOADER_COLOR } )
                        self.G.add_edge( n2.id, n1.id )
                        
                        n1.add_risk( "DEXCLASSLOADER" )

    def _get_exist_node(self, class_name, method_name, descriptor) :
        key = "%s %s %s" % (class_name, method_name, descriptor)
        try :
            return self.nodes[ key ]
        except KeyError :
            return None

    def _get_node(self, class_name, method_name, descriptor) :
        key = "%s %s %s" % (class_name, method_name, descriptor)
        if key not in self.nodes :
            self.nodes[ key ] = NodeF( len(self.nodes), class_name, method_name, descriptor )
            self.nodes_id[ self.nodes[ key ].id ] = self.nodes[ key ]

        return self.nodes[ key ]

    def _get_new_node_from(self, n, label) :
        return self._get_new_node( n.class_name, n.method_name, n.descriptor + label, label )

    def _get_new_node(self, class_name, method_name, descriptor, label) :
        key = "%s %s %s" % (class_name, method_name, descriptor)
        if key not in self.nodes :
            self.nodes[ key ] = NodeF( len(self.nodes), class_name, method_name, descriptor, label, False )
            self.nodes_id[ self.nodes[ key ].id ] = self.nodes[ key ]

        return self.nodes[ key ]

    def set_new_attributes(self, cm) :
        for i in self.G.nodes() :
            n1 = self.nodes_id[ i ]
            m1 = self.vm.get_method_descriptor( n1.class_name, n1.method_name, n1.descriptor )

            H = cm( self.vmx, m1 )

            n1.set_attributes( H )

    def export_to_gexf(self) :
        buff = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
        buff += "<gexf xmlns=\"http://www.gephi.org/gexf\" xmlns:viz=\"http://www.gephi.org/gexf/viz\">\n"
        buff += "<graph type=\"static\">\n"

        buff += "<attributes class=\"node\" type=\"static\">\n" 
        buff += "<attribute default=\"normal\" id=\"%d\" title=\"type\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "type"]
        buff += "<attribute id=\"%d\" title=\"class_name\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "class_name"]
        buff += "<attribute id=\"%d\" title=\"method_name\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "method_name"]
        buff += "<attribute id=\"%d\" title=\"descriptor\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "descriptor"]


        buff += "<attribute default=\"0\" id=\"%d\" title=\"permissions\" type=\"integer\"/>\n" % ID_ATTRIBUTES[ "permissions"]
        buff += "<attribute default=\"normal\" id=\"%d\" title=\"permissions_level\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "permissions_level"]
        
        buff += "<attribute default=\"false\" id=\"%d\" title=\"dynamic_code\" type=\"boolean\"/>\n" % ID_ATTRIBUTES[ "dynamic_code"]
        buff += "</attributes>\n"   

        buff += "<nodes>\n"
        for node in self.G.nodes() :
            buff += "<node id=\"%d\" label=\"%s\">\n" % (node, escape(self.nodes_id[ node ].label))
            buff += self.nodes_id[ node ].get_attributes_gexf()
            buff += "</node>\n"
        buff += "</nodes>\n"


        buff += "<edges>\n"
        nb = 0
        for edge in self.G.edges() :
            buff += "<edge id=\"%d\" source=\"%d\" target=\"%d\"/>\n" % (nb, edge[0], edge[1])
            nb += 1
        buff += "</edges>\n"


        buff += "</graph>\n"
        buff += "</gexf>\n"

        return buff

    def export_to_gml(self) :
        buff = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"
        buff += "<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:y=\"http://www.yworks.com/xml/graphml\" xmlns:yed=\"http://www.yworks.com/xml/yed/3\" xsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns http://www.yworks.com/xml/schema/graphml/1.1/ygraphml.xsd\">\n"

        buff += "<key attr.name=\"description\" attr.type=\"string\" for=\"node\" id=\"d5\"/>\n"
        buff += "<key for=\"node\" id=\"d6\" yfiles.type=\"nodegraphics\"/>\n"

        buff += "<graph edgedefault=\"directed\" id=\"G\">\n"

        for node in self.G.nodes() :
            buff += "<node id=\"%d\">\n" % (node)
            #fd.write( "<node id=\"%d\" label=\"%s\">\n" % (node, escape(self.nodes_id[ node ].label)) )
            buff += self.nodes_id[ node ].get_attributes_gml()
            buff += "</node>\n"

        nb = 0
        for edge in self.G.edges() :
            buff += "<edge id=\"%d\" source=\"%d\" target=\"%d\"/>\n" % (nb, edge[0], edge[1])
            nb += 1

        buff += "</graph>\n"
        buff += "</graphml>\n"
        
        return buff

    def get_paths_method(self, method) :
        return self.get_paths( method.get_class_name(), method.get_name(), method.get_descriptor() )

    def get_paths(self, class_name, method_name, descriptor) :
        import connectivity_approx as ca
        paths = []
        key = "%s %s %s" % (class_name, method_name, descriptor)
       
        if key not in self.nodes :
            return paths

        for origin in self.G.nodes() : #self.entry_nodes :
            if ca.vertex_connectivity_approx(self.G, origin, self.nodes[ key ].id) > 0 :
                for path in ca.node_independent_paths(self.G, origin, self.nodes[ key ].id) :
                    if self.nodes_id[ path[0] ].real == True :
                        paths.append( path )
        return paths

    def print_paths_method(self, method) :
        self.print_paths( method.get_class_name(), method.get_name(), method.get_descriptor() )

    def print_paths(self, class_name, method_name, descriptor) :
        paths = self.get_paths( class_name, method_name, descriptor )
        for path in paths :
            print path, ":"
            print "\t",
            for p in path[:-1] :
                print self.nodes_id[ p ].label, "-->",
            print self.nodes_id[ path[-1] ].label
Beispiel #44
0
class LSDB(object):
    def __init__(self):
        self.private_address_network = ip_network(CFG.get(DEFAULTSECT,
                                                  'private_net'))
        try:
            with open(CFG.get(DEFAULTSECT, 'private_ips'), 'r') as f:
                self.private_address_binding = json.load(f)
                self.router_private_address = {}
                for subnets in self.private_address_binding.itervalues():
                    for rid, ip in subnets.iteritems():
                        try:
                            iplist = self.router_private_address[rid]
                        except KeyError:
                            iplist = self.router_private_address[rid] = []
                        iplist.append(ip)
        except Exception as e:
            log.error('Incorrect private IP addresses binding file')
            log.error(str(e))
            self.private_address_binding = {}
            self.router_private_address = {}
        self.last_line = ''
        self.transaction = None
        self.graph = DiGraph()
        self.routers = {}  # router-id : lsa
        self.networks = {}  # DR IP : lsa
        self.ext_networks = {}  # (router-id, dest) : lsa
        self.controllers = defaultdict(list)  # controller nr : ip_list
        self.listener = {}
        self.keep_running = True
        self.queue = Queue()
        self.processing_thread = Thread(target=self.process_lsa,
                                        name="lsa_processing_thread")
        self.processing_thread.start()

    def get_leader(self):
        return min(self.controllers.iterkeys())

    def stop(self):
        for l in self.listener.values():
            l.session.stop()
        self.keep_running = False
        self.queue.put('')

    def lsdb(self, lsa):
        if lsa.TYPE == RouterLSA.TYPE:
            return self.routers
        elif lsa.TYPE == NetworkLSA.TYPE:
            return self.networks
        elif lsa.TYPE == ASExtLSA.TYPE:
            return self.ext_networks

    def register_change_listener(self, listener):
        try:
            del self.listener[listener]
            log.info('Shapeshifter disconnected.')
        except KeyError:
            log.info('Shapeshifter connected.')
            l = ProxyCloner(ShapeshifterProxy, listener)
            self.listener[listener] = l
            l.boostrap_graph(graph=[(u, v, d.get('metric', -1))
                                    for u, v, d in self.graph.edges(data=True)])

    @staticmethod
    def extract_lsa_properties(lsa_part):
        d = {}
        for prop in lsa_part.split(SEP_INTER_FIELD):
            if not prop:
                continue
            key, val = prop.split(SEP_INTRA_FIELD)
            d[key] = val
        return d

    def commit_change(self, line):
        # Check that this is not a duplicate of a previous update ...
        if self.last_line == line:
            return
        self.queue.put(line)

    def forwarding_address_of(self, src, dst):
        """
        Return the forwarding address for a src, dst pair. If src is specified, return
        the private 'link-local' address of the src-dst link, otherwise return a 'public'
        IP belonging to dst
        :param src: the source node of the link towards the FA, possibly null
        :param dst: the node owning the forwarding address
        :return: forwarding address (str) or None if no compatible address was found
        """
        try:
            return self.graph[src][dst]['dst_address'] if src \
                else self.graph[dst][self.graph.neighbors(dst)[0]]['src_address']
        except KeyError:
            log.debug('%s-%s not found in graph', src, dst)
            return None

    def remove_lsa(self, lsa):
        lsdb = self.lsdb(lsa)
        try:
            del lsdb[lsa.key()]
        except KeyError:
            pass

    def add_lsa(self, lsa):
        lsdb = self.lsdb(lsa)
        lsdb[lsa.key()] = lsa

    def process_lsa(self):
        while self.keep_running:
            commit = False
            try:
                line = self.queue.get(timeout=5)
                if not line:
                    self.queue.task_done()
                    continue
                # Start parsing the LSA log
                action, lsa_info = line.split(SEP_ACTION)
                if action == BEGIN:
                    self.transaction = Transaction()
                elif action == COMMIT:
                    if self.transaction:
                        self.transaction.commit(self)
                        self.transaction = None
                        commit = True
                else:
                    lsa_parts = [self.extract_lsa_properties(part)
                                 for part in lsa_info.split(SEP_GROUP) if part]
                    lsa = LSA.parse(LSAHeader.parse(lsa_parts.pop(0)),
                                    lsa_parts)
                    log.debug('Parsed %s: %s', action, lsa)
                    lsdb = self.lsdb(lsa)
                    if action == REM:
                        if not self.transaction:
                            self.remove_lsa(lsa)
                        else:
                            self.transaction.remove_lsa(lsa)
                    elif action == ADD:
                        if not self.transaction:
                            self.add_lsa(lsa)
                        else:
                            self.transaction.add_lsa(lsa)
                    if lsa.push_update_on_remove() or not action == REM:
                        commit = True
                self.queue.task_done()
            except Empty:
                if self.transaction:
                    log.debug('Splitting transaction due to timeout')
                    self.transaction.commit(self)
                    self.transaction = Transaction()
                    commit = True
            if commit:
                # Update graph accordingly
                new_graph = self.build_graph()
                # Compute graph difference and update it
                self.update_graph(new_graph)

    def __str__(self):
        strs = [str(lsa) for lsa in chain(self.routers.values(),
                                          self.networks.values(),
                                          self.ext_networks.values())]
        strs.insert(0, '* LSDB Content [%d]:' % len(strs))
        return '\n'.join(strs)

    def build_graph(self):
        new_graph = DiGraph()
        # Rebuild the graph from the LSDB
        for lsa in chain(self.routers.values(),
                         self.networks.values(),
                         self.ext_networks.values()):
            lsa.apply(new_graph, self)
        # Contract all IPs to their respective router-id
        for lsa in self.routers.values():
            lsa.contract_graph(new_graph, self.router_private_address.get(
                lsa.routerid, []))
        # Figure out the controllers layout
        base_net = ip_network(CFG.get(DEFAULTSECT, 'base_net'))
        controller_prefix = CFG.getint(DEFAULTSECT, 'controller_prefixlen')
        # Group by controller and log them
        for ip in new_graph.nodes_iter():
            addr = ip_address(ip)
            if addr in base_net:
                """1. Compute address diff to remove base_net
                   2. Right shift to remove host bits
                   3. Mask with controller mask
                """
                id = (((int(addr) - int(base_net.network_address)) >>
                       base_net.max_prefixlen - controller_prefix) &
                      ((1 << controller_prefix) - 1))
                self.controllers[id].append(ip)
        # Contract them on the graph
        for id, ips in self.controllers.iteritems():
            contract_graph(new_graph, ips, 'C_%s' % id)
        # Remove generated self loops
        new_graph.remove_edges_from(new_graph.selfloop_edges())
        self.apply_secondary_addresses(new_graph)
        return new_graph

    def update_graph(self, new_graph):
        added_edges = graph_diff(new_graph, self.graph)
        removed_edges = graph_diff(self.graph, new_graph)
        # Propagate differences
        if len(added_edges) > 0 or len(removed_edges) > 0:
            log.debug('Pushing changes')
            for u, v in added_edges:
                self.listener_add_edge(u, v, new_graph[u][v]['metric'])
            for u, v in removed_edges:
                self.listener_remove_edge(u, v)
            if CFG.getboolean(DEFAULTSECT, 'draw_graph'):
                draw_graph(new_graph)
            self.graph = new_graph
            log.info('LSA update yielded +%d -%d edges changes' %
                     (len(added_edges), len(removed_edges)))

    def listener_add_edge(self, *args):
        for l in self.listener.values():
            l.add_edge(*args)

    def listener_remove_edge(self, *args):
        for l in self.listener.values():
            l.remove_edge(*args)

    def apply_secondary_addresses(self, graph):
        for subnet in self.private_address_binding.itervalues():
            for dst, ip in subnet.iteritems():
                for src in subnet.iterkeys():
                    if src == dst:
                        continue
                    try:
                        graph[src][dst]['dst_address'] = ip
                    except KeyError:
                        pass
Beispiel #45
0
class GraphManager(object):
    """ Generates and processes the graph based on packets
    """

    def __init__(self, packets, layer=3, geo_ip=os.path.expanduser('~/GeoIP.dat')):
        self.graph = DiGraph()
        self.layer = layer
        self.geo_ip = None
        self.data = {}

        try:
            self.geo_ip = GeoIP(geo_ip)
        except:
            logging.warning("could not load GeoIP data")

        if self.layer == 2:
            edges = map(self._layer_2_edge, packets)
        elif self.layer == 3:
            edges = map(self._layer_3_edge, packets)
        elif self.layer == 4:
            edges = map(self._layer_4_edge, packets)
        else:
            raise ValueError("Other layers than 2,3 and 4 are not supported yet!")

        for src, dst, packet in filter(lambda x: not (x is None), edges):
            if src in self.graph and dst in self.graph[src]:
                self.graph[src][dst]['packets'].append(packet)
            else:
                self.graph.add_edge(src, dst, {'packets': [packet]})

        for node in self.graph.nodes():
            self._retrieve_node_info(node)

        for src, dst in self.graph.edges():
            self._retrieve_edge_info(src, dst)

    def get_in_degree(self, print_stdout=True):
        unsorted_degrees = self.graph.in_degree()
        return self._sorted_results(unsorted_degrees, print_stdout)

    def get_out_degree(self, print_stdout=True):
        unsorted_degrees = self.graph.out_degree()
        return self._sorted_results(unsorted_degrees, print_stdout)

    @staticmethod
    def _sorted_results(unsorted_degrees, print_stdout):
        sorted_degrees = OrderedDict(sorted(unsorted_degrees.items(), key=lambda t: t[1], reverse=True))
        for i in sorted_degrees:
            if print_stdout:
                print(sorted_degrees[i], i)
        return sorted_degrees

    def _retrieve_node_info(self, node):
        self.data[node] = {}
        if self.layer >= 3 and self.geo_ip:
            if self.layer == 3:
                self.data[node]['ip'] = node
            elif self.layer == 4:
                self.data[node]['ip'] = node.split(':')[0]

            node_ip = self.data[node]['ip']
            country = self.geo_ip.country_name_by_addr(node_ip)
            self.data[node]['country'] = country if country else 'private'
        #TODO layer 2 info?

    def _retrieve_edge_info(self, src, dst):
        edge = self.graph[src][dst]
        if edge:
            packets = edge['packets']
            edge['layers'] = set(list(itertools.chain(*[set(GraphManager.get_layers(p)) for p in packets])))
            edge['transmitted'] = sum(len(p) for p in packets)
            edge['connections'] = len(packets)

    @staticmethod
    def get_layers(packet):
        return list(GraphManager.expand(packet))

    @staticmethod
    def expand(x):
        yield x.name
        while x.payload:
            x = x.payload
            yield x.name

    @staticmethod
    def _layer_2_edge(packet):
        return packet[0].src, packet[0].dst, packet

    @staticmethod
    def _layer_3_edge(packet):
        if packet.haslayer(IP):
            return packet[1].src, packet[1].dst, packet

    @staticmethod
    def _layer_4_edge(packet):
        if any(map(lambda p: packet.haslayer(p), [TCP, UDP])):
            src = packet[1].src
            dst = packet[1].dst
            _ = packet[2]
            return "%s:%i" % (src, _.sport), "%s:%i" % (dst, _.dport), packet

    def draw(self, filename=None, figsize=(50, 50)):
        graph = self.get_graphviz_format()

        for node in graph.nodes():
            node.attr['shape'] = 'circle'
            node.attr['fontsize'] = '10'
            node.attr['width'] = '0.5'
            if 'country' in self.data[str(node)]:
                country_label = self.data[str(node)]['country']
                if country_label == 'private':
                    node.attr['label'] = str(node)
                else:
                    node.attr['label'] = "%s (%s)" % (str(node), country_label)
                if not (country_label == 'private'):
                    node.attr['color'] = 'blue'
                    node.attr['style'] = 'filled'
                    #TODO add color based on country or scan?
        for edge in graph.edges():
            connection = self.graph[edge[0]][edge[1]]
            edge.attr['label'] = 'transmitted: %i bytes\n%s ' % (connection['transmitted'],  ' | '.join(connection['layers']))
            edge.attr['fontsize'] = '8'
            edge.attr['minlen'] = '2'
            edge.attr['penwidth'] = min(connection['connections'] * 1.0 / len(self.graph.nodes()), 2.0)

        graph.layout(prog='dot')
        graph.draw(filename)

    #TODO do we need a .dot file export?
    def get_graphviz_format(self, filename=None):
        agraph = networkx.to_agraph(self.graph)
        if filename:
            agraph.write(filename)
        return agraph
Beispiel #46
0
class ApkViewer(object):
    def __init__(self, a):
        self.a = a

        self.G = DiGraph()
        self.all_files = {}
        self.ids = {}

        root = Directory( "APK" )
        root.set_color( "00FF00" )

        self.ids[ root ] = len(self.ids)
        self.G.add_node( root )

        for x, y, z in self.a.get_files_information():
            print(x, y, z, os.path.basename(x))

            l = []
            splitall( x, l )
            l.reverse()
            l.pop(0)


            last = root
            for i in l:
                if i not in self.all_files:
                    tmp = Directory( i )
                    self.ids[ tmp ] = len(self.ids)
                    self.all_files[ i ] = tmp
                else:
                    tmp = self.all_files[ i ]

                self.G.add_edge(last, tmp)
                last = tmp

            n1 = last
            n2 = File( x, y, z )
            self.G.add_edge(n1, n2)

            self.ids[ n2 ] = len(self.ids)

    def export_to_gml(self):
        buff = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"
        buff += "<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:y=\"http://www.yworks.com/xml/graphml\" xmlns:yed=\"http://www.yworks.com/xml/yed/3\" xsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns http://www.yworks.com/xml/schema/graphml/1.1/ygraphml.xsd\">\n"

        buff += "<key attr.name=\"description\" attr.type=\"string\" for=\"node\" id=\"d5\"/>\n"
        buff += "<key for=\"node\" id=\"d6\" yfiles.type=\"nodegraphics\"/>\n"


        buff += "<graph edgedefault=\"directed\" id=\"G\">\n"


        for node in self.G.nodes():
            print(node)

            buff += "<node id=\"%d\">\n" % self.ids[node]
            buff += "<data key=\"d6\">\n"
            buff += "<y:ShapeNode>\n"

            buff += "<y:Geometry height=\"%f\" width=\"%f\"/>\n" % (60.0, 7 * node.width)
            buff += "<y:Fill color=\"#%s\" transparent=\"false\"/>\n" % node.color

            buff += "<y:NodeLabel>\n"
            buff += "%s\n" % node.basename

            if isinstance(node, File):
                buff += "%s\n" % node.file_type
                buff += "%s\n" % hex(node.file_crc)

            buff += "</y:NodeLabel>\n"

            buff += "</y:ShapeNode>\n"
            buff += "</data>\n"

            buff += "</node>\n"

        nb = 0
        for edge in self.G.edges():
            buff += "<edge id=\"%d\" source=\"%d\" target=\"%d\">\n" % (nb, self.ids[edge[0]], self.ids[edge[1]])
            buff += "</edge>\n"
            nb += 1

        buff += "</graph>\n"
        buff += "</graphml>\n"

        return buff
    def _update_one_step(self):
        """ experiment advance by one iteration """
        max_iter = self._set_info["max_iter"]

        if self._is_directed:
            old_graph, new_graph = DiGraph(), DiGraph()
        else:
            old_graph, new_graph = Graph(), Graph()

        old_graph.add_edges_from(self._recommender._user_connections)
        old_tot_conns = len(old_graph.edges())

        new_connections = []
        tot_suggestions = 0
        tot_rejections  = 0
        if self._iteration < max_iter:
            start_time = datetime.now()

            uniq_user_ids = self._recommender._user_ids

            for ii, user_id in enumerate(uniq_user_ids):
                # retrieve recommended users
                # if user_id in self._rejected_user_dict:
                #    block_list = self._rejected_user_dict[user_id]
                # else:
                #    block_list = []

                suggestions = self._recommender.gen_suggestion(user_id)
                accepted, rejected = self._clicker.click(user_id, suggestions)

                tot_suggestions += len(suggestions)
                tot_rejections += len(rejected)

                # append new connections
                if len(accepted) > 0:
                    pairs = [[user_id, new_friend] for new_friend in accepted]
                    if len(new_connections) == 0:
                        new_connections = pairs
                    else:
                        new_connections.extend(pairs)

                # track rejected users
                # for some clicker simulator which, yeild rejected of empty
                # all the time to allow re-suggestions on recommended users
                if len(rejected) > 0:
                    if user_id in self._rejected_user_dict:
                        self._rejected_user_dict[user_id].extend(rejected)
                    else:
                        self._rejected_user_dict[user_id] = rejected

            # consolidate new connections
            new_connections = array(new_connections)

            # tracking experiment progress
            self._iteration += 1

            if new_connections.shape[0] > 0:
                # update simulator's connection data
                # self.load_init_user_connections(updated_user_connections)
                self._recommender.add_new_connections(new_connections)
                self._recommender.update()
                self._no_growth_counter = 0

            new_graph.add_edges_from(self._recommender._user_connections)
            new_tot_conns = len(new_graph.edges())
            new_added_conns = new_tot_conns - old_tot_conns

            duration = datetime.now() - start_time
            total_cost = duration.total_seconds()

            # collect evaluation scores
            self._evaluator.load_eval_user_connections(self._recommender._user_connections)
            eval_score = self._evaluator.get_score()

            cand_size = 0
            recommender_memory = copy.deepcopy(self._recommender._ordered_cand_dict)
            for k in recommender_memory.keys():
                cand_size += len(recommender_memory[k])

            # measure the network
            if self._is_directed:
                now_graph = DiGraph()
            else:
                now_graph = Graph()

            if self._total_edges_ref is None:
                if self._is_directed:
                    ref_graph = DiGraph()
                else:
                    ref_graph = Graph()
                ref_graph.add_edges_from(self._evaluator._ref_user_connections)
                self._total_edges_ref = len(ref_graph.edges())

            now_graph.add_edges_from(self._recommender._user_connections)
            now_num_edges = len(now_graph.edges())
            ref_num_edges = self._total_edges_ref

            # collect information
            exp_record = {"iteration": self._iteration,
                          "start_time": start_time.strftime("%Y-%m-%d %H:%M:%S"),
                          "time_cost_seconds": total_cost,
                          "num_new_connections_size": new_added_conns,
                          "old_user_connections_size": old_tot_conns, 
                          "now_user_connections_size": now_num_edges,
                          "ref_user_connections_size": ref_num_edges,
                          "tot_suggestions": tot_suggestions,
                          "tot_rejections": tot_rejections,
                          "tot_remain_candidates": cand_size
                          }

            exp_record.update(eval_score)

            # mark advacned of experiment
            self._recommender.update_iteration()

            return exp_record

        else:
            msg = "experiment had reached the maximum iteration (max: " + str(max_iter) + ")"
            warnings.warn(msg)
Beispiel #48
0
class MacroManager(object):
    """This class manages the macros specified in the configuration file.

    The parameters of each section, along with their dependencies are passed to
    the class. Then, it verifies that the dependencies are correct (they form a
    DAG and respect the sections dependencies) and creates an ordered list of
    the macros to be used when replacing their actual values in a given
    combination.
    """

    def __init__(self):
        """Create a new MacroManager object."""

        self.dep_graph = DiGraph()

        self.ds_macros = set([])
        self.xp_macros = set([])

        self.__define_test_macros()

    def __define_test_macros(self):
        """Define values and dependencies of test macros.

        A set of macros are defined by default, including input and output
        directories of datasets and experiments and their identifiers.
        """

        self.test_macros = {
            "data_base_dir": "/tests/data",
            "out_base_dir": "/tests/out",
            "data_dir": "/tests/data/0",  # data_base_dir/ds_id
            "out_dir": "/tests/out/0",  # data_base_dir/comb_id
            "comb_id": 0,
            "ds_id": 0,
            "xp.input": "/tests/data/0",  # data_dir
            "xp.output": "/tests/out/0"  # out_dir
        }

        self.ds_params = set([])
        self.xp_params = set([])

        self.dep_graph.add_nodes_from(self.test_macros.keys())

        self.add_dependency("data_base_dir", "data_dir")
        self.add_dependency("ds_id", "data_dir")
        self.add_dependency("data_dir", "xp.input")

        self.add_dependency("out_base_dir", "out_dir")
        self.add_dependency("comb_id", "out_dir")
        self.add_dependency("out_dir", "xp.output")

        self.sorted_test_macros = topological_sort(self.dep_graph)

    def update_test_macros(self, ds_id=None, comb_id=None):
        """Update test macros with dataset and/or combination ids.
        
        Args:
          ds_id (int, optional):
           The dataset identifier.
          comb_id (int, optional):
            The combination identifier.
        """

        if ds_id:
            if "data_dir" in self.test_macros:
                self.test_macros["data_dir"] = \
                    self.test_macros["data_base_dir"] + "/" + str(ds_id)
                if "xp.input" in self.test_macros:
                    self.test_macros["xp.input"] = \
                        self.test_macros["data_dir"]
        if comb_id:
            if "out_dir" in self.test_macros:
                self.test_macros["out_dir"] = \
                    self.test_macros["out_base_dir"] + "/" + str(comb_id)
                if "xp.output" in self.test_macros:
                    self.test_macros["xp.output"] = \
                        self.test_macros["out_dir"]

    def __filter_unused_test_macros(self):
        for m in reversed(self.sorted_test_macros):
            if not self.dep_graph.successors(m):
                self.dep_graph.remove_node(m)
                self.sorted_test_macros.remove(m)
                del self.test_macros[m]

    def add_ds_params(self, params):
        """Add the list of dataset parameters.
        
        Args:
          params (dict):
            The list of dataset parameters.
        """

        self.ds_params = self.ds_params.union(params)

    def add_xp_params(self, params):
        """Add the list of experiment parameters.
        
        Args:
          params (dict):
            The list of experiment parameters.
        """

        self.xp_params = self.xp_params.union(params)

    def add_dependency(self, m1, m2):
        """Include a new macro dependency: m1 -> m2. This means that to obtain
        the value of m2 we use the value of m1.
        
        Args:
          m1 (string):
            The name of the param used.
          m2 (string):
            The name of the param being specified.

        Raises:
          MacroException:
            If the order of sections (test -> ds -> xp) is not respected.
        """

        # Check if dependency is correct
        if m1 in self.ds_params:
            if m2 in self.test_macros:
                logger.error("Not allowed dependency: ds -> test")
                raise MacroException("Not allowed dependency: ds -> test")
        elif m1 in self.xp_params:
            if m2 in self.test_macros:
                logger.error("Not allowed dependency: xp -> test")
                raise MacroException("Not allowed dependency: xp -> test")
            elif m2 in self.ds_params:
                logger.error("Not allowed dependency: xp -> ds")
                raise MacroException("Not allowed dependency: xp -> ds")

        # Add dependency
        self.dep_graph.add_edge(m1, m2)

    def sort_macros(self):
        """Sort macros respecting dependencies.
        
        Raises:
          MacroException:
            If there are cycles in dependencies between macros.
        """

        # Filter out unused test variables
        self.__filter_unused_test_macros()

        # Sort ds and xp macros
        try:
            self.sorted_ds_macros = \
                topological_sort(self.dep_graph.subgraph(self.ds_params))
            self.sorted_xp_macros = \
                topological_sort(self.dep_graph.subgraph(self.xp_params))
        except NetworkXUnfeasible:
            raise MacroException("Macros do not follow a DAG")

        logger.info("Dependencies = " + str(self.dep_graph.edges()))
        logger.info("Test macros = " + str(self.sorted_test_macros))
        logger.info("Dataset macros = " + str(self.sorted_ds_macros))
        logger.info("Experiment macros = " + str(self.sorted_xp_macros))

    def _replace_macros_from_list(self, list_macros, value):
        """Replace the macros given in the list within the value if present.
        
        Args:
          list_macros (dict):
            The list of macros to replace and their respective values.
          value (string):
            The value where to do the replacement.
        """

        new_value = value
        for m in list_macros:
            new_value = new_value.replace("${" + m + "}", str(list_macros[m]))
        return new_value

    def replace_ds_macros(self, comb):
        """Replace macros in ds combination.
        
        Args:
          comb (dict):
            The combination of parameters.
        """

        list_macros = self.test_macros

        for m in self.sorted_ds_macros:
            comb[m] = self._replace_macros_from_list(list_macros, comb[m])
            list_macros[m] = comb[m]

    def replace_xp_macros(self, comb):
        """Replace macros in xp combination.
        
        Args:
          comb (dict):
            The combination of parameters.
        """

        list_macros = self.test_macros

        for m in self.sorted_ds_macros:
            comb[m] = self._replace_macros_from_list(list_macros, comb[m])
            list_macros[m] = comb[m]

        for m in self.sorted_xp_macros:
            comb[m] = self._replace_macros_from_list(list_macros, comb[m])
            list_macros[m] = comb[m]
    def load(self,fname, verbose=True, **kwargs):
        """
        Load a data file. The expected data format is three columns 
        (comma seperated by default) with source, target, flux.
        No header should be included and the node IDs have to run contuously 
        from 0 to Number_of_nodes-1.

        Parameters
        ----------
            fname : str
                Path to the file
            
            verbose : bool
                Print information about the data. True by Default
                
            kwargs : dict
                Default parameters can be changed here. Supported key words are
                    dtype     : float (default)
                    delimiter : ","   (default)
        
            return_graph : bool
                If True, the graph is returned (False by default).
                
        Returns:
        --------
            The graph is saved internally in self.graph.
                
        """
        delimiter = kwargs["delimiter"]      if "delimiter"      in kwargs.keys() else " "
        
        data = np.genfromtxt(fname, delimiter=delimiter, dtype=int, unpack=False)
        source, target = data[:,0], data[:,1]
        if data.shape[1] > 2:
            flux = data[:,2]
        else:
            flux = np.ones_like(source)
        nodes  = set(source) | set(target)
        self.nodes = len(nodes)
        lines  = len(flux)
        if set(range(self.nodes)) != nodes:
            new_node_ID = {old:new for new,old in enumerate(nodes)}
            map_new_node_ID = np.vectorize(new_node_ID.__getitem__)
            source = map_new_node_ID(source)
            target = map_new_node_ID(target)
            if verbose:
                print "\nThe node IDs have to run continuously from 0 to Number_of_nodes-1."
                print "Node IDs have been changed according to the requirement.\n-----------------------------------\n"
                
        
            print 'Lines: ',lines , ', Nodes: ', self.nodes
            print '-----------------------------------\nData Structure:\n\nsource,    target,    weight \n'
            for ii in range(7):            
                print "%i,       %i,       %1.2e" %(source[ii], target[ii], flux[ii])
            print '-----------------------------------\n'
        
        
        G = DiGraph()         # Empty, directed Graph
        G.add_nodes_from(range(self.nodes))
        for ii in xrange(lines):
            u, v, w = int(source[ii]), int(target[ii]), float(flux[ii])
            if u != v: # ignore self loops
                assert not G.has_edge(u,v), "Edge appeared twice - not supported"                    
                G.add_edge(u,v,weight=w)
            else:
                if verbose:
                    print "ignore self loop at node", u
        
        symmetric = True
        for s,t,w in G.edges(data=True):
            w1 = G[s][t]["weight"]
            try:
                w2 = G[t][s]["weight"]
            except KeyError:
                symmetric = False
                G.add_edge(t,s,weight=w1)
                w2 = w1
            if w1 != w2:
                symmetric = False
                G[s][t]["weight"] += G[t][s]["weight"]
                G[s][t]["weight"] /= 2
                G[t][s]["weight"]  = G[s][t]["weight"]
        if verbose:
            if not symmetric:
                print "The network has been symmetricised."
        
        
        ccs = strongly_connected_component_subgraphs(G)
        ccs = sorted(ccs, key=len, reverse=True)
        
        G_GSCC = ccs[0]
        if G_GSCC.number_of_nodes() != G.number_of_nodes():
            G = G_GSCC
            if verbose:
                print "\n--------------------------------------------------------------------------"
                print "The network has been restricted to the giant strongly connected component."
        self.nodes = G.number_of_nodes()
        
        
        
        
        for u, v, data in G.edges(data=True):
            weight = G.out_degree(u,weight='weight')
            data['transition_rate'] = 1.*data['weight']/weight
        
        
        for u, v, data in G.edges(data=True):
            data['effective_distance'] = 1. - log(data['transition_rate'])
        
        if verbose:
            print "\n--------------------------------------------------------------------------"
            print "\nnode ID, out-weight,   normalized out-weight,  sum of effective distances \n "
            for ii in range(7):
                out_edges = G.out_edges(ii, data=True)
                out_weight, effective_distance, transition_rate = 0, 0, 0
                for u, v, data in out_edges:
                    out_weight          += data["weight"]
                    effective_distance  += data["effective_distance"]
                    transition_rate     += data["transition_rate"]
                print "  %i       %1.2e           %2.3f                 %1.2e " %(ii,out_weight, transition_rate, effective_distance)
            print "\n ... graph is saved in self.graph"
        return G
Beispiel #50
0
    def gen_graph(self, seed, sheet=None):
        """
        Given a starting point (e.g., A6, or A3:B7) on a particular sheet,
        generate a Spreadsheet instance that captures the logic and control
        flow of the equations.

        """

        # starting points
        cursheet = sheet if sheet is not None else self.excel.get_active_sheet()
        self.excel.set_sheet(cursheet)

        # no need to output nr and nc here, since seed can be a list of unlinked cells
        seeds, nr, nc = Cell.make_cells(self.excel, seed, sheet=cursheet)
        seeds = list(flatten(seeds))

        logger.debug("Seed %s expanded into %s cells" % (seed, len(seeds)))

        # only keep seeds with formulas or numbers
        seeds = [s for s in seeds if s.formula or isinstance(s.value, number_types)]

        logger.debug("%s filtered seeds" % len(seeds))

        # cells to analyze: only formulas
        todo = [s for s in seeds if s.formula]

        logger.debug("%s cells on the todo list" % len(todo))

        # map of all cells
        cellmap = OrderedDict([(x.address(), x) for x in seeds])

        # directed graph
        graph = DiGraph()

        # match the info in cellmap
        for cell in itervalues(cellmap):
            self.add_node_to_graph(graph, cell)

        while todo:
            c1 = todo.pop()

            logger.debug("Handling {}".format(c1.address()))

            # set the current sheet so relative addresses resolve properly
            if c1.sheet != cursheet:
                cursheet = c1.sheet
                self.excel.set_sheet(cursheet)

            # parse the formula into code
            pystr, ast = self.cell2code(c1)

            # set the code & compile it (will flag problems sooner rather than later)
            c1.python_expression = pystr
            c1.compile()

            # get all the cells/ranges this formula refers to
            deps = [x.tvalue.replace('$', '') for x in ast.nodes() if isinstance(x, RangeNode)]

            # remove dupes
            deps = uniqueify(deps)

            for dep in deps:
                # if the dependency is a multi-cell range, create a range object
                if is_range(dep):
                    # this will make sure we always have an absolute address
                    rng = CellRange(dep, sheet=cursheet)

                    if rng.address() in cellmap:
                        # already dealt with this range
                        # add an edge from the range to the parent
                        graph.add_edge(cellmap[rng.address()], cellmap[c1.address()])
                        continue
                    else:
                        # turn into cell objects
                        cells, nrows, ncols = Cell.make_cells(self.excel, dep, sheet=cursheet)

                        # get the values so we can set the range value
                        if nrows == 1 or ncols == 1:
                            rng.value = [c.value for c in cells]
                        else:
                            rng.value = [[c.value for c in cells[j]] for j in range(len(cells))]

                        # save the range
                        cellmap[rng.address()] = rng
                        # add an edge from the range to the parent
                        self.add_node_to_graph(graph, rng)
                        graph.add_edge(rng, cellmap[c1.address()])
                        # cells in the range should point to the range as their parent
                        target = rng
                else:
                    # not a range, create the cell object
                    cells = [Cell.resolve_cell(self.excel, dep, sheet=cursheet)]
                    target = cellmap[c1.address()]

                # process each cell
                for c2 in flatten(cells):
                    # if we haven't treated this cell already
                    if c2.address() not in cellmap:
                        if c2.formula:
                            # cell with a formula, needs to be added to the `todo` list
                            todo.append(c2)
                        else:
                            # constant cell, no need for further processing, just remember to set the code
                            pystr, ast = self.cell2code(c2)
                            c2.python_expression = pystr
                            if not isinstance(pystr, (int, float)):
                                c2.compile()

                        # save in the cellmap
                        cellmap[c2.address()] = c2
                        # add to the graph
                        self.add_node_to_graph(graph, c2)

                    # add an edge from the cell to the parent (range or cell)
                    graph.add_edge(cellmap[c2.address()], target)

        logger.info("Graph construction done, %s nodes, %s edges, %s cellmap entries" % (len(graph.nodes()),
                                                                                         len(graph.edges()),
                                                                                         len(cellmap)))

        sp = Spreadsheet(graph=graph, cellmap=cellmap, filename=self.filename)

        return sp
Beispiel #51
0
class GVMAnalysis :
    def __init__(self, vmx, apk) :
        self.vmx = vmx
        self.vm = self.vmx.get_vm()

        self.__nodes = {}
        self.__nodes_id = {}
        self.G = DiGraph()

        for j in self.vmx.tainted_packages.get_internal_packages() :
            n1 = self._get_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() )
            n2 = self._get_node( j.get_class_name(), j.get_name(), j.get_descriptor() )


            m1 = self.vm.get_method_descriptor( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor()  )
            m2 = j.get_method()

            n1.set_attributes( { "android_api" : libsign.entropy( self.vmx.get_method_signature(m1, "L4", { "L4" : { "arguments" : ["Landroid"] } } ).get_string() ) } )
            n2.set_attributes( { "android_api" : libsign.entropy( self.vmx.get_method_signature(m2, "L4", { "L4" : { "arguments" : ["Landroid"] } } ).get_string() ) } )

            n1.set_attributes( { "java_api" : libsign.entropy( self.vmx.get_method_signature(m1, "L4", { "L4" : { "arguments" : ["Ljava"] } } ).get_string() ) } )
            n2.set_attributes( { "java_api" : libsign.entropy( self.vmx.get_method_signature(m2, "L4", { "L4" : { "arguments" : ["Ljava"] } } ).get_string() ) } )

            self.G.add_edge( n1.id, n2.id )
            
        #    print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \
        #                                            j.get_bb().start + j.get_idx(), \
        #                                            j.get_class_name(), j.get_name(), j.get_descriptor())

        if apk != None :
            for i in apk.get_activities() :
                j = bytecode.FormatClassToJava(i)
                n1 = self._get_exist_node( j, "onCreate", "(Landroid/os/Bundle;)V" )
                if n1 != None : 
                    n1.set_attributes( { "type" : "activity" } )
                    n1.set_attributes( { "color" : ACTIVITY_COLOR } )
                    n2 = self._get_new_node_from( n1, "ACTIVITY" )
                    n2.set_attributes( { "color" : ACTIVITY_COLOR } )
                    self.G.add_edge( n2.id, n1.id )
            for i in apk.get_services() :
                j = bytecode.FormatClassToJava(i)
                n1 = self._get_exist_node( j, "onCreate", "()V" )
                if n1 != None : 
                    n1.set_attributes( { "type" : "service" } )
                    n1.set_attributes( { "color" : SERVICE_COLOR } )
                    n2 = self._get_new_node_from( n1, "SERVICE" )
                    n2.set_attributes( { "color" : SERVICE_COLOR } )
                    self.G.add_edge( n2.id, n1.id )
            for i in apk.get_receivers() :
                j = bytecode.FormatClassToJava(i)
                n1 = self._get_exist_node( j, "onReceive", "(Landroid/content/Context; Landroid/content/Intent;)V" )
                if n1 != None : 
                    n1.set_attributes( { "type" : "receiver" } )
                    n1.set_attributes( { "color" : RECEIVER_COLOR } )
                    n2 = self._get_new_node_from( n1, "RECEIVER" )
                    n2.set_attributes( { "color" : RECEIVER_COLOR } )
                    self.G.add_edge( n2.id, n1.id )

        for c in self.vm.get_classes() :
            #if c.get_superclassname() == "Landroid/app/Service;" :
            #    n1 = self._get_node( c.get_name(), "<init>", "()V" )
            #    n2 = self._get_node( c.get_name(), "onCreate", "()V" )

            #    self.G.add_edge( n1.id, n2.id )
            if c.get_superclassname() == "Ljava/lang/Thread;" :
                for i in self.vm.get_method("run") :
                    if i.get_class_name() == c.get_name() :
                        n1 = self._get_node( i.get_class_name(), i.get_name(), i.get_descriptor() )
                        n2 = self._get_node( i.get_class_name(), "start", i.get_descriptor() ) 
                        
                        self.G.add_edge( n1.id, n2.id )

       
        list_permissions = self.vmx.get_permissions( [] ) 
        for x in list_permissions :
            for j in list_permissions[ x ] :

                #print "\t %s %s %s %x ---> %s %s %s" % (j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor(), \
                #                                    j.get_bb().start + j.get_idx(), \
                #                                    j.get_class_name(), j.get_name(), j.get_descriptor())
                n1 = self._get_exist_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() )
                
                if n1 == None :
                    continue

                n1.set_attributes( { "permissions" : 1 } )
                n1.set_attributes( { "permissions_level" : DVM_PERMISSIONS[ "MANIFEST_PERMISSION" ][ x ][0] } )
                
                try :
                    for tmp_perm in PERMISSIONS_RISK[ x ] :
                        if tmp_perm in DEFAULT_RISKS :
                            n2 = self._get_new_node( j.get_method().get_class_name(), j.get_method().get_name(), j.get_method().get_descriptor() + " " + DEFAULT_RISKS[ tmp_perm ][0],
                                                     DEFAULT_RISKS[ tmp_perm ][0] )
                            n2.set_attributes( { "color" : DEFAULT_RISKS[ tmp_perm ][1] } )
                            self.G.add_edge( n2.id, n1.id )
                except KeyError :
                    pass

        for m, _ in self.vmx.tainted_packages.get_packages() :
            if m.get_info() == "Ldalvik/system/DexClassLoader;" :
                for path in m.get_paths() :
                    if path.get_access_flag() == TAINTED_PACKAGE_CREATE :
                        n1 = self._get_exist_node( path.get_method().get_class_name(), path.get_method().get_name(), path.get_method().get_descriptor() )    
                        n2 = self._get_new_node( path.get_method().get_class_name(), path.get_method().get_name(), path.get_method().get_descriptor() + " " + "DEXCLASSLOADER",
                                                 "DEXCLASSLOADER" )

                        n1.set_attributes( { "dynamic_code" : "true" } )
                        n2.set_attributes( { "color" : DEXCLASSLOADER_COLOR } )
                        self.G.add_edge( n2.id, n1.id )

    def _get_exist_node(self, class_name, method_name, descriptor) :
        key = "%s %s %s" % (class_name, method_name, descriptor)
        try :
            return self.__nodes[ key ]
        except KeyError :
            return None

    def _get_node(self, class_name, method_name, descriptor) :
        key = "%s %s %s" % (class_name, method_name, descriptor)
        if key not in self.__nodes :
            self.__nodes[ key ] = NodeF( len(self.__nodes), class_name, method_name, descriptor )
            self.__nodes_id[ self.__nodes[ key ].id ] = self.__nodes[ key ]

        return self.__nodes[ key ]

    def _get_new_node_from(self, n, label) :
        return self._get_new_node( n.class_name, n.method_name, n.descriptor + label, label )

    def _get_new_node(self, class_name, method_name, descriptor, label) :
        key = "%s %s %s" % (class_name, method_name, descriptor)
        if key not in self.__nodes :
            self.__nodes[ key ] = NodeF( len(self.__nodes), class_name, method_name, descriptor, label )
            self.__nodes_id[ self.__nodes[ key ].id ] = self.__nodes[ key ]

        return self.__nodes[ key ]

    def export_to_gexf(self, output) :
        fd = open(output, "w")

        fd.write( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" )
        fd.write( "<gexf xmlns=\"http://www.gephi.org/gexf\" xmlns:viz=\"http://www.gephi.org/gexf/viz\">\n" )
        fd.write( "<graph type=\"static\">\n")

        fd.write( "<attributes class=\"node\" type=\"static\">\n" )
        fd.write( "<attribute default=\"normal\" id=\"%d\" title=\"type\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "type"] )
        fd.write( "<attribute id=\"%d\" title=\"class_name\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "class_name"] )
        fd.write( "<attribute id=\"%d\" title=\"method_name\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "method_name"] )
        fd.write( "<attribute id=\"%d\" title=\"descriptor\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "descriptor"] )


        fd.write( "<attribute default=\"0\" id=\"%d\" title=\"permissions\" type=\"integer\"/>\n" % ID_ATTRIBUTES[ "permissions"] )
        fd.write( "<attribute default=\"normal\" id=\"%d\" title=\"permissions_level\" type=\"string\"/>\n" % ID_ATTRIBUTES[ "permissions_level"] )
        fd.write( "<attribute default=\"0.0\" id=\"%d\" title=\"android_api\" type=\"float\"/>\n" % ID_ATTRIBUTES[ "android_api"] )
        fd.write( "<attribute default=\"0.0\" id=\"%d\" title=\"java_api\" type=\"float\"/>\n" % ID_ATTRIBUTES[ "java_api"] )
        
        fd.write( "<attribute default=\"false\" id=\"%d\" title=\"dynamic_code\" type=\"boolean\"/>\n" % ID_ATTRIBUTES[ "dynamic_code"] )
        fd.write( "</attributes>\n" )   

        fd.write( "<nodes>" )
        for node in self.G.node :
            fd.write( "<node id=\"%d\" label=\"%s\">\n" % (node, escape(self.__nodes_id[ node ].label)) )
            fd.write( self.__nodes_id[ node ].get_attributes() )
            fd.write( "</node>\n" )
        fd.write( "</nodes>\n" )


        fd.write( "<edges>\n" )
        nb = 0
        for edge in self.G.edge :
            for link in self.G.edges( edge ) :
                fd.write( "<edge id=\"%d\" source=\"%d\" target=\"%d\"/>\n" % (nb, link[0], link[1]) )
                nb += 1
        fd.write( "</edges>\n")


        fd.write("</graph>\n")
        fd.write("</gexf>\n")
        fd.close()
Beispiel #52
0
  Purpose: 
  Created: 2018/7/25
"""

import unittest
from lpp import *
from networkx import DiGraph

if __name__ == '__main__':
    RAW = open(sys.argv[1],'rU')
    END = open(sys.argv[2],'w')
    has = {}
    seq = ""
    number = 1
    network = DiGraph()
    for line in RAW:
        line_l = line.strip().split()
            
        if line_l[0] not in has:
            has[line_l[0]] = ""
	    start = line_l[7]+line_l[8]
                
        else:
            end =  line_l[7]+line_l[8]
            network.add_edge(start,end)
            start = end 
    for start ,end in network.edges():
        END.write(start+'\t'+end+'\n')
    #END.write('>scaffold%s\n'%(number)+seq+'\n')