Esempio n. 1
0
def test_sampling_graph1():
    """Test sampling of problematic graph.

    The issue with this graph is that the operation on (1, 3) would prune out
    (3, 3) the one causing the cycle, except that it is retained because there
    is still a non-cyclic path through (3, 3) via (1, 1). However, in
    subsequent steps, pruning of downstream nodes (i.e., (2, 4)) actually
    eliminate any acyclic paths through (1, 3). As a result, there is a
    circumstance, when sampling the resulting graph, that one can end up
    sampling into (1, 3) but there are no permissible successors from (1, 3)
    based on the tags.

    The solution was to repeat the sampling process iteratively until
    convergence.
    """
    g = nx.DiGraph()
    g.add_edges_from([(0, 1), (0, 3), (0, 4), (0, 5), (1, 4), (2, 4), (2, 5),
                      (3, 0), (3, 2), (3, 4), (3, 5), (4, 2), (4, 3), (4, 5)])
    source, target, length = (0, 5, 5)
    (f_level, b_level) = pg.get_reachable_sets(g,
                                               source,
                                               target,
                                               max_depth=length)
    pre_cfpg = pg.PreCFPG.from_graph(g, source, target, length, f_level,
                                     b_level)
    paths = pre_cfpg.sample_paths(100)
Esempio n. 2
0
def scaling_random_graphs(num_samples, min_size, max_size, edge_prob=0.5):
    data_shape =  (max_size - min_size + 1, num_samples)
    times_nx_paths = np.empty(data_shape)
    times_pg = np.empty(data_shape)
    times_cfpg = np.empty(data_shape)

    # Iterate over number of nodes in network
    for i, num_nodes in enumerate(range(min_size, max_size+1)):
        print(f'Number of nodes in network: {num_nodes}')

        # Iterate over num_samples random graphs of this size
        for j in range(num_samples):
            print(f'Sample {j}')
            # Generate a random graph
            rg = nx.erdos_renyi_graph(num_nodes, edge_prob, directed=True)

            # Select two nodes as source and target
            source = 0
            target = num_nodes - 1

            # Time to compute all simple paths with path probabilities
            start = time.time()
            paths = [tuple(p) for p in nx.all_simple_paths(rg, source, target)]
            #paths2 = [tuple(p) for p in nx.shortest_simple_paths(rg, source, target)]
            #assert(set(paths) == set(paths2))

            # Now build a path tree from the paths and calculate probabilities
            pt = PathsTree(paths)
            path_probs = pt.path_probabilities()
            # Save the time it took the calculate
            end = time.time()
            elapsed = end - start
            times_nx_paths[i, j] = elapsed

            # Time to compute paths_graphs and make combined graph
            pg_start = time.time()
            f_level, b_level = get_reachable_sets(rg, source, target, num_nodes)
            pg_list = []
            for length in range(1, num_nodes):
                pg = PathsGraph.from_graph(rg, source, target, length,
                                           f_level, b_level)
                pg_list.append(pg)
            combined_pg = CombinedPathsGraph(pg_list)
            # NOTE: no count_paths method
            total_paths = combined_pg.count_paths()
            print(f'Total paths (with cycles): {total_paths}')

            #cf_paths = combined_pg.sample_cf_paths(100000)
            pg_elapsed = time.time() - pg_start
            times_pg[i, j] = pg_elapsed

            # Now compute the CFPG
            cfpg_list = []
            for pg in pg_list:
                cfpg = CFPG.from_pg(pg)
                cfpg_list.append(cfpg)
            cfpg_elapsed = time.time() - pg_start
            times_cfpg[i, j] = cfpg_elapsed
    return times_nx_paths, times_pg, times_cfpg
def run_pg_vs_nx(graph, source, target, depth, num_samples):
    # PG sampling
    start = time.time()
    f_level, b_level = get_reachable_sets(graph, source, target, depth)
    pg_list = []
    for i in range(1, depth + 1):
        pg = PathsGraph.from_graph(graph, source, target, i, f_level, b_level)
        pg_list.append(pg)
    combined_pg = CombinedPathsGraph(pg_list)
    print("Sampling from PG")
    cf_paths = []
    while len(cf_paths) < num_samples:
        print(f'{len(cf_paths)} / {num_samples}')
        cf_path_chunk = combined_pg.sample_paths(100)
    #cf_paths = []
    end = time.time()
    #print("Done sampling from PG")
    print("Done generating PGs")
    pg_elapsed = end - start

    # Networkx enumeration
    index = 0
    start = time.time()
    nx_paths = []
    nx_sampled_paths = []
    """
    for p in nx.all_simple_paths(graph, source, target, cutoff=depth):
        nx_paths.append(tuple(p))
        if index % 10000 == 0:
            print(index)
        index += 1
    #print("Making PathsTree")
    #paths_tree = PathsTree(nx_paths)
    #print("Sampling PathsTree")
    #nx_sampled_paths = paths_tree.sample(num_samples)
    end = time.time()
    nx_elapsed = end - start
    #assert set(cf_paths) <= set(nx_paths)
    print("all_simple_paths done")
    print("Total paths (nx):", len(nx_paths))
    print("Unique sampled paths (pg):", len(set(cf_paths)))
    #print("Unique sampled_paths (tree):", len(set(nx_sampled_paths)))
    print("NX time", nx_elapsed)
    print("PG time", pg_elapsed)

    nx_sampled_paths = []
    """
    nx_elapsed = 0
    return {
        'pg_list': pg_list,
        'pg_paths': cf_paths,
        'nx_paths': nx_paths,
        'nx_paths_sampled': nx_sampled_paths,
        'pg_time': pg_elapsed,
        'nx_time': nx_elapsed
    }
Esempio n. 4
0
def test_from_graph_with_levels_bad_depth():
    """Raise an exception if the requested path length is greater than the
    depth of the provided reach sets."""
    (f_reach, b_reach) = pg.get_reachable_sets(g_uns,
                                               source,
                                               target,
                                               max_depth=2)
    cfpg = pg.CFPG.from_graph(g_uns,
                              source,
                              target,
                              length,
                              fwd_reachset=f_reach,
                              back_reachset=b_reach)
    assert not cfpg.graph
Esempio n. 5
0
def test_from_pg():
    (f_reach, b_reach) = pg.get_reachable_sets(g_uns,
                                               source,
                                               target,
                                               max_depth=length)
    pg_0 = pg.PathsGraph.from_graph(g_uns, source, target, length, f_reach,
                                    b_reach)
    cfpg = pg.CFPG.from_pg(pg_0)
    paths = cfpg.enumerate_paths()
    assert len(paths) == 2
    assert ('A', 'B', 'D', 'C', 'E') in paths
    assert ('A', 'C', 'D', 'B', 'E') in paths
    assert len(cfpg.graph) == 8
    # The D node should be split into two nodes
    d_nodes = [n for n in cfpg.graph.nodes() if n[1] == 'D']
    assert len(d_nodes) == 2
Esempio n. 6
0
def test_from_graph_with_levels_bad_depth():
    """Raise an exception if the requested path length is greater than the
    depth of the provided reach sets."""
    g4_uns = nx.DiGraph()
    g4_uns.add_edges_from(((0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1)))
    source, target, length = (0, 2, 2)
    max_depth = 1
    (f_reach, b_reach) = \
            pg.get_reachable_sets(g4_uns, source, target, max_depth=max_depth)
    pre_cfpg = pg.PreCFPG.from_graph(g4_uns,
                                     source,
                                     target,
                                     length,
                                     fwd_reachset=f_reach,
                                     back_reachset=b_reach)
    assert not pre_cfpg.graph
Esempio n. 7
0
def test_prune():
    g = nx.DiGraph()
    g.add_edges_from((('S', 'A'), ('S', 'B'), ('A', 'S'), ('B', 'C'),
                      ('C', 'D'), ('D', 'T'), ('B', 'T')))
    length = 4
    (f_level, b_level) = pg.get_reachable_sets(g, 'S', 'T', max_depth=length)
    pg_raw = pg.PathsGraph.from_graph(g, 'S', 'T', length, f_level, b_level)
    pg_raw_edges = pg_raw.graph.edges()
    nodes_to_prune = [(2, 'S')]
    # Prune the graph
    pg_pruned = pcf.prune(pg_raw.graph, nodes_to_prune, (0, 'S'),
                          (length, 'T'))
    # Make sure we didn't change the original graphs or node lists
    assert nodes_to_prune == [(2, 'S')]
    assert pg_raw.graph.edges() == pg_raw_edges
    # The correctly pruned structure
    assert set(pg_pruned.edges()) == \
           set([((0, 'S'), (1, 'B')), ((1, 'B'), (2, 'C')),
                ((2, 'C'), (3, 'D')), ((3, 'D'), (4, 'T'))])
Esempio n. 8
0
def test_from_pg():
    g4_uns = nx.DiGraph()
    g4_uns.add_edges_from(((0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1)))
    source, target, length = (0, 2, 2)
    (f_level, b_level) = pg.get_reachable_sets(g4_uns,
                                               source,
                                               target,
                                               max_depth=length)
    pg_raw = pg.PathsGraph.from_graph(g4_uns, source, target, length, f_level,
                                      b_level)
    pre_cfpg = pg.PreCFPG.from_pg(pg_raw)
    assert isinstance(pre_cfpg, pg.PreCFPG)
    assert pre_cfpg.graph
    assert set(pre_cfpg.graph.edges()) == \
                            set([((0, 0), (1, 1)), ((1, 1), (2, 2))])
    assert pre_cfpg.tags == {
        (0, 0): [(0, 0)],
        (1, 1): [(0, 0), (1, 1)],
        (2, 2): [(0, 0), (1, 1), (2, 2)]
    }
Esempio n. 9
0
def test_from_graph_with_levels():
    g4_uns = nx.DiGraph()
    g4_uns.add_edges_from(((0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1)))
    source, target, length = (0, 2, 2)
    max_depth = 5
    (f_reach, b_reach) = \
            pg.get_reachable_sets(g4_uns, source, target, max_depth=max_depth)
    pre_cfpg = pg.PreCFPG.from_graph(g4_uns,
                                     source,
                                     target,
                                     length,
                                     fwd_reachset=f_reach,
                                     back_reachset=b_reach)

    assert isinstance(pre_cfpg, pg.PreCFPG)
    assert pre_cfpg.graph
    assert set(pre_cfpg.graph.edges()) == \
                            set([((0, 0), (1, 1)), ((1, 1), (2, 2))])
    assert pre_cfpg.tags == {
        (0, 0): [(0, 0)],
        (1, 1): [(0, 0), (1, 1)],
        (2, 2): [(0, 0), (1, 1), (2, 2)]
    }
Esempio n. 10
0
print("Loading network")
with open('_cache/nx_dir_graph_db_dump_20190417.pkl', 'rb') as f:
    g = pickle.load(f)

print("Done loading network")

source = 'NCKAP1'
target = 'TEAD1'
max_depth = 5
num_samples = 20000

print("Getting reachable sets")
fwd_reach, back_reach = get_reachable_sets(g,
                                           source,
                                           target,
                                           max_depth,
                                           signed=False)

print("Building PG")
pg_list = []
for cur_length in range(1, max_depth + 1):
    print("Building paths graph for length %d" % cur_length)
    pg = PathsGraph.from_graph(g,
                               source,
                               target,
                               cur_length,
                               fwd_reach,
                               back_reach,
                               signed=False,
                               target_polarity=0)
Esempio n. 11
0
def run_pg_cfpg(rg, source, target):
    num_nodes = len(rg)
    # Time to compute paths_graphs and make combined graph
    pg_start = time.time()
    f_level, b_level = get_reachable_sets(rg, source, target, num_nodes)
    pg_list = []
    for length in range(1, num_nodes):
        pg = PathsGraph.from_graph(rg, source, target, length, f_level,
                                   b_level)
        pg_list.append(pg)
    combined_pg = CombinedPathsGraph(pg_list)

    ht = HypothesisTester(0.5, 0.1, 0.1, 0.05)
    tf = None
    tfs = []
    nsamples = 0
    batch = 10
    while tf is None:
        new_paths = combined_pg.sample_cf_paths(batch)
        if not new_paths:
            tf = 0
            break
        tfs += [exists_property(p, 5) for p in new_paths]
        nsamples += batch
        tf = ht.test(tfs)
    print(f'PG: {tf} based on {nsamples} samples')

    # cf_paths = combined_pg.sample_cf_paths(10000)
    # print(prob_ascending_path(cf_paths))

    pg_elapsed = time.time() - pg_start
    print(f'PG: {pg_elapsed:.2f}s')

    # Now compute the CFPG
    cfpg_list = []
    for pg in pg_list:
        cfpg = CFPG.from_pg(pg)
        cfpg_list.append(cfpg)
    ccfpg = CombinedCFPG(cfpg_list)

    print('Sampling CFPG')
    ht = HypothesisTester(0.5, 0.1, 0.1, 0.05)
    tf = None
    tfs = []
    nsamples = 0
    batch = 10
    while tf is None:
        new_paths = ccfpg.sample_paths(batch)
        if not new_paths:
            tf = 0
            break
        tfs += [exists_property(p, 5) for p in new_paths]
        nsamples += batch
        tf = ht.test(tfs)
    print(f'CFPG: {tf} based on {nsamples} samples')

    #cfpg_paths = ccfpg.sample_paths(10000)
    #print(prob_ascending_path(cfpg_paths))

    cfpg_elapsed = time.time() - pg_start
    print(f'CFPG: {cfpg_elapsed:.2f}s')
    return pg_elapsed, cfpg_elapsed
Esempio n. 12
0
    def _sample_paths(self,
                      input_rule_set,
                      obs_name,
                      target_polarity,
                      max_paths=1,
                      max_path_length=5):
        if max_paths == 0:
            raise ValueError("max_paths cannot be 0 for path sampling.")
        if not has_pg:
            raise ImportError("Paths Graph is not imported")
        # Convert path polarity representation from 0/1 to 1/-1

        def convert_polarities(path_list):
            return [
                tuple((n[0], 0 if n[1] > 0 else 1) for n in path)
                for path in path_list
            ]

        pg_polarity = 0 if target_polarity > 0 else 1
        nx_graph = self._im_to_signed_digraph(self.get_im())
        # Add edges from dummy node to input rules
        source_node = 'SOURCE_NODE'
        for rule in input_rule_set:
            nx_graph.add_edge(source_node, rule, sign=0)
        # -------------------------------------------------
        # Create combined paths_graph
        f_level, b_level = pg.get_reachable_sets(nx_graph,
                                                 source_node,
                                                 obs_name,
                                                 max_path_length,
                                                 signed=True)
        pg_list = []
        for path_length in range(1, max_path_length + 1):
            cfpg = pg.CFPG.from_graph(nx_graph,
                                      source_node,
                                      obs_name,
                                      path_length,
                                      f_level,
                                      b_level,
                                      signed=True,
                                      target_polarity=pg_polarity)
            pg_list.append(cfpg)
        combined_pg = pg.CombinedCFPG(pg_list)
        # Make sure the combined paths graph is not empty
        if not combined_pg.graph:
            pr = PathResult(False, 'NO_PATHS_FOUND', max_paths,
                            max_path_length)
            pr.path_metrics = None
            pr.paths = []
            return pr

        # Get a dict of rule objects
        rule_obj_dict = {}
        for ann in self.model.annotations:
            if ann.predicate == 'rule_has_object':
                rule_obj_dict[ann.subject] = ann.object

        # Get monomer initial conditions
        ic_dict = {}
        for mon in self.model.monomers:
            # FIXME: A hack that depends on the _0 convention
            ic_name = '%s_0' % mon.name
            # TODO: Wrap this in try/except?
            ic_param = self.model.parameters[ic_name]
            ic_value = ic_param.value
            ic_dict[mon.name] = ic_value

        # Set weights in PG based on model initial conditions
        for cur_node in combined_pg.graph.nodes():
            edge_weights = {}
            rule_obj_list = []
            edge_weights_by_gene = {}
            for u, v in combined_pg.graph.out_edges(cur_node):
                v_rule = v[1][0]
                # Get the object of the rule (a monomer name)
                rule_obj = rule_obj_dict.get(v_rule)
                if rule_obj:
                    # Add to list so we can count instances by gene
                    rule_obj_list.append(rule_obj)
                    # Get the abundance of rule object from the initial
                    # conditions
                    # TODO: Wrap in try/except?
                    ic_value = ic_dict[rule_obj]
                else:
                    ic_value = 1.0
                edge_weights[(u, v)] = ic_value
                edge_weights_by_gene[rule_obj] = ic_value
            # Get frequency of different rule objects
            rule_obj_ctr = Counter(rule_obj_list)
            # Normalize results by weight sum and gene frequency at this level
            edge_weight_sum = sum(edge_weights_by_gene.values())
            edge_weights_norm = {}
            for e, v in edge_weights.items():
                v_rule = e[1][1][0]
                rule_obj = rule_obj_dict.get(v_rule)
                if rule_obj:
                    rule_obj_count = rule_obj_ctr[rule_obj]
                else:
                    rule_obj_count = 1
                edge_weights_norm[e] = ((v / float(edge_weight_sum)) /
                                        float(rule_obj_count))
            # Add edge weights to paths graph
            nx.set_edge_attributes(combined_pg.graph,
                                   name='weight',
                                   values=edge_weights_norm)

        # Sample from the combined CFPG
        paths = combined_pg.sample_paths(max_paths)
        # -------------------------------------------------
        if paths:
            pr = PathResult(True, 'PATHS_FOUND', max_paths, max_path_length)
            pr.path_metrics = None
            # Convert path polarity representation from 0/1 to 1/-1
            pr.paths = convert_polarities(paths)
            # Strip off the SOURCE_NODE prefix
            pr.paths = [p[1:] for p in pr.paths]
        else:
            assert False
            pr = PathResult(False, 'NO_PATHS_FOUND', max_paths,
                            max_path_length)
            pr.path_metrics = None
            pr.paths = []
        return pr
Esempio n. 13
0
print(edge_count)
pb_signed = nx.DiGraph()
pb_signed.add_edges_from(pb_sign_edges)

src_edges = list(itertools.product(['root'], src_nodes, [{'sign': 0}]))

graph = pb_signed
graph.add_edges_from(src_edges)
source = 'root'
target = chek2_node
depth = 6
num_samples = 1000

f_level, b_level = get_reachable_sets(graph,
                                      source,
                                      target,
                                      depth,
                                      signed=True)
pg_list = []

for i in range(1, depth + 1):
    pg = PathsGraph.from_graph(graph,
                               source,
                               target,
                               i,
                               f_level,
                               b_level,
                               signed=True,
                               target_polarity=1)
    pg_list.append(pg)
combined_pg = CombinedPathsGraph(pg_list)
Esempio n. 14
0
        #if ag_ns == 'HGNC':
        #    ag_id = hgnc_client.get_hgnc_id(ag_id)
        source_list.append((ag_ns, ag_id))

    # Add a dummy source
    graph_file = '../input/july_2018_pa_HGNC_FPLX_typed_directional_pairs.tsv'
    graph = load_stmt_graph(graph_file)
    dummy_edges = [('SOURCE', src[1]) for src in source_list]
    dummy_edges += [(tgt[1], 'TARGET') for tgt in target_list]
    graph.add_edges_from(dummy_edges)

    max_depth = 8
    pg_list = []
    lengths = []
    stmt_counts = []
    f_level, b_level = get_reachable_sets(graph, 'SOURCE', 'TARGET', max_depth)
    for length in range(3, max_depth + 1):
        pg = PathsGraph.from_graph(graph,
                                   'SOURCE',
                                   'TARGET',
                                   length,
                                   fwd_reachset=f_level,
                                   back_reachset=b_level)

        stmt_hashes = get_stmt_hashes_from_pg(graph, pg)
        print("%d stmts for paths of length %d" %
              (len(stmt_hashes), length - 2))
        pg_list.append(pg)
        lengths.append(length - 2)
        stmt_counts.append(len(stmt_hashes))
    plt.ion()
Esempio n. 15
0
def test_initialize():
    source = 'A'
    target = 'D'
    length = 3
    # We first run the pg_0 calculation on a simple graph with no cycles
    # involving the source or target
    (f_level, b_level) = pg.get_reachable_sets(g1_uns,
                                               source,
                                               target,
                                               max_depth=length)
    pg_raw = pg.PathsGraph.from_graph(g1_uns, source, target, length, f_level,
                                      b_level)
    (pg_0, tags) = pcf._initialize_pre_cfpg(pg_raw)
    # Because no nodes are pruned, the initialized "cycle free" paths graph
    # will be the same as the path graph we started with
    assert pg_0 == pg_raw.graph
    assert tags == {
        (0, 'A'): [(0, 'A')],
        (1, 'B'): [(0, 'A')],
        (2, 'C'): [(0, 'A')],
        (3, 'D'): [(0, 'A')]
    }

    # The next graph contains a cycle passing through the source node, A,
    # and no acyclic paths
    (f_level, b_level) = pg.get_reachable_sets(g2_uns,
                                               source,
                                               target,
                                               max_depth=length)
    pg_raw = pg.PathsGraph.from_graph(g2_uns, source, target, length, f_level,
                                      b_level)
    (pg_0, tags) = pcf._initialize_pre_cfpg(pg_raw)
    assert not pg_0
    assert not tags

    # The next graph contains a cycle passing through the source node, A,
    # with one acyclic path
    (f_level, b_level) = pg.get_reachable_sets(g3_uns,
                                               source,
                                               target,
                                               max_depth=length)
    pg_raw = pg.PathsGraph.from_graph(g3_uns, source, target, length, f_level,
                                      b_level)
    (pg_0, tags) = pcf._initialize_pre_cfpg(pg_raw)
    assert set(pg_0.edges()) == set([((0, 'A'), (1, 'B')),
                                     ((1, 'B'), (2, 'C')),
                                     ((2, 'C'), (3, 'D'))])
    assert tags == {
        (0, 'A'): [(0, 'A')],
        (1, 'B'): [(0, 'A')],
        (2, 'C'): [(0, 'A')],
        (3, 'D'): [(0, 'A')]
    }

    # This test stems from a randomly-generated network where no paths
    # were found--guarantees that the problem is NOT that pg_0 is empty
    g4_uns = nx.DiGraph()
    g4_uns.add_edges_from(((0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1)))
    source, target, length = (0, 2, 2)
    (f_level, b_level) = pg.get_reachable_sets(g4_uns,
                                               source,
                                               target,
                                               max_depth=length)
    pg_raw = pg.PathsGraph.from_graph(g4_uns, source, target, length, f_level,
                                      b_level)
    (pg_0, tags) = pcf._initialize_pre_cfpg(pg_raw)
    assert pg_0
    assert tags
Esempio n. 16
0
    graph = nx.DiGraph()
    graph.add_edges_from(edges)
    draw(graph, join(output_dir, 'toy_%s_graph.pdf' % direction))

if __name__ == '__main__':

    output_dir = sys.argv[1]

    # Draw G
    draw(g, join(output_dir, 'toy_g.pdf'))

    depth = 4
    source = 'S'
    target = 'T'

    f_level, b_level = get_reachable_sets(g, source, target, depth)
    draw_reachset(g, f_level, 'forward', depth, output_dir)
    draw_reachset(g, b_level, 'backward', depth, output_dir)

    print("f_level", f_level)
    print("b_level", b_level)

    pg = PathsGraph.from_graph(g, source, target, depth)
    draw(pg.graph, join(output_dir, 'toy_pg_%d.pdf' % depth))

    # Combined paths graph
    pg_list = []
    for i in range(1, 4+1):
        pg_list.append(PathsGraph.from_graph(g, source, target, i))
    cpg = CombinedPathsGraph(pg_list)
    draw(cpg.graph, join(output_dir, 'toy_combined_pg.pdf'))
Esempio n. 17
0
def test_on_random_graphs():
    """For each of 25 random graphs, check that the number of cycle free paths
    for a given depth and source/target pair matches the results from
    networkx all_simple_paths. Graphs range from rough"""
    # We use 25 randomly generated graphs for testing the algorithm
    with open(random_graph_pkl, 'rb') as f:
        rg_dict = pickle.load(f)

    min_depth = 5
    max_depth = 10
    for i in range(1):
        edges, source, target = rg_dict[i]
        G_i = nx.DiGraph()
        G_i.add_edges_from(edges)
        print("graph# %d, %d nodes, %d edges" %
              (i, len(G_i.nodes()), len(G_i.edges())))
        (f_reach, b_reach) = pg.get_reachable_sets(G_i,
                                                   source,
                                                   target,
                                                   max_depth=max_depth,
                                                   signed=False)
        # Try different path lengths
        for length in range(min_depth, max_depth + 1):
            print("Checking paths of length %d" % length)
            # For validation, we compute explicitly the set of paths in the
            # original graph of a fixed length
            P = list(nx.all_simple_paths(G_i, source, target, length + 1))
            # Filter to paths of this length
            P_correct = [tuple(p) for p in P if len(p) == length + 1]
            # Generate the raw paths graph
            G_cf = pg.CFPG.from_graph(G_i, source, target, length, f_reach,
                                      b_reach)
            # Check the path count
            path_count = G_cf.count_paths()
            assert len(P_correct) == path_count
            # Enumerate paths using node tuples
            P_cf_pruned = G_cf.enumerate_paths(names_only=False)
            # Next we extract the paths by projecting down to second
            # component (node names)
            P_cf_pruned_names = G_cf.enumerate_paths(names_only=True)
            print("# of paths: %d" % len(P_cf_pruned_names))

            # We verify the three required properties.
            # Recall:
            # CF1: Every source-to-target path in G_cf is cycle free.
            # CF2: Every cycle free path in the original graph appears as a
            #      source-to-target path in G_cf.
            # CF3: There is a 1-1 correspondence between the paths in G_cf and
            # the paths in the original graph. This means there is no
            # redundancy in the representation. For every path in the original
            # graph there is a unique path in G_cf that corresponds to it.

            # We first verify CF1.
            for p in P_cf_pruned_names:
                if len(p) != len(list(set(p))):
                    print("cycle!")
                    print(p)
                    assert False
            # Next we verify CF2. We will in fact check if the set of paths in
            # P_cf_pruned_names is exactly the set of paths in the original
            # graph.
            if set(P_correct) != set(P_cf_pruned_names):
                print("Paths do not match reference set from networkx")
                print("graph, length", (i, length))
                assert False
            # Finally we verify CF3
            if len(P_cf_pruned) != len(list(set(P_cf_pruned_names))):
                print("redundant representation!")
                print("graph, length", (i, length))
                assert False
            target = random.choice(genes)
            print("depth", depth, "rep", rep_ix + 1, "source", source,
                  "target", target)
            index = 0
            start = time.time()
            for p in nx.all_simple_paths(graph, source, target, cutoff=depth):
                if index % 10000 == 0:
                    print(index)
                index += 1
            end = time.time()
            nx_elapsed = end - start
            results[0, depth_ix, rep_ix] = nx_elapsed
            print("done")

            start = time.time()
            f_level, b_level = get_reachable_sets(graph, source, target,
                                                  MAX_DEPTH)
            total_paths = 0
            for i in range(1, depth + 1):
                print(i)
                cfpg = CFPG.from_graph(graph, source, target, i, f_level,
                                       b_level)
                path_count = cfpg.count_paths()
                print(path_count, "paths")
                total_paths += path_count
            print("total paths", total_paths)
            print("nx paths", index)
            end = time.time()
            pg_elapsed = end - start
            results[1, depth_ix, rep_ix] = pg_elapsed
            print("NX time", nx_elapsed)
            print("CFPG time", pg_elapsed)