예제 #1
0
def test_pg_hypothesis_checking_batch():
    g_uns = nx.DiGraph()
    g_uns.add_edges_from((('A', 'B'), ('A', 'C'), ('C', 'D'), ('B', 'D'),
                          ('D', 'B'), ('D', 'C'), ('B', 'E'), ('C', 'E')))
    # For reference, these are the paths in this graph
    # [('A', 'B', 'D', 'B', 'E'),
    #  ('A', 'B', 'D', 'C', 'E'),
    #  ('A', 'C', 'D', 'B', 'E'),
    #  ('A', 'C', 'D', 'C', 'E')])
    # with 3 of them containing 'B' and one not containing 'B'
    source, target, length = ('A', 'E', 4)
    pg = PathsGraph.from_graph(g_uns, source, target, length)

    # We want to check that B is on the path somewhere
    formula = 'F([B])'
    # We set up a hypothesis test saying that we want to verify that
    # the formula is True with at least 0.2 probability (i.e. the probability
    # that a randomly samples path satisfies the formula). We allow
    # a Type-I error rate of 0.1 and Type-II error rate of 0.1, and use
    # a 0.01 indifference parameter around the value 0.2.
    ht = HypothesisTester(0.2, 0.1, 0.1, 0.01)
    # We next sample paths one by one until we can decide the hypothesis test
    samples = []
    while True:
        # Sample one path
        path = pg.sample_paths(1)[0]
        # Verify the path
        pc = PathChecker(formula, path)
        samples.append(pc.truth)
        # Check if the samples so far are sufficient to decide the hypothesis
        # and stop sampling if they are
        hyp = ht.test(samples)
        if hyp is not None:
            break
    assert hyp == 0
예제 #2
0
def scaling_random_graphs(num_samples, min_size, max_size, edge_prob=0.5):
    data_shape =  (max_size - min_size + 1, num_samples)
    times_nx_paths = np.empty(data_shape)
    times_pg = np.empty(data_shape)
    times_cfpg = np.empty(data_shape)

    # Iterate over number of nodes in network
    for i, num_nodes in enumerate(range(min_size, max_size+1)):
        print(f'Number of nodes in network: {num_nodes}')

        # Iterate over num_samples random graphs of this size
        for j in range(num_samples):
            print(f'Sample {j}')
            # Generate a random graph
            rg = nx.erdos_renyi_graph(num_nodes, edge_prob, directed=True)

            # Select two nodes as source and target
            source = 0
            target = num_nodes - 1

            # Time to compute all simple paths with path probabilities
            start = time.time()
            paths = [tuple(p) for p in nx.all_simple_paths(rg, source, target)]
            #paths2 = [tuple(p) for p in nx.shortest_simple_paths(rg, source, target)]
            #assert(set(paths) == set(paths2))

            # Now build a path tree from the paths and calculate probabilities
            pt = PathsTree(paths)
            path_probs = pt.path_probabilities()
            # Save the time it took the calculate
            end = time.time()
            elapsed = end - start
            times_nx_paths[i, j] = elapsed

            # Time to compute paths_graphs and make combined graph
            pg_start = time.time()
            f_level, b_level = get_reachable_sets(rg, source, target, num_nodes)
            pg_list = []
            for length in range(1, num_nodes):
                pg = PathsGraph.from_graph(rg, source, target, length,
                                           f_level, b_level)
                pg_list.append(pg)
            combined_pg = CombinedPathsGraph(pg_list)
            # NOTE: no count_paths method
            total_paths = combined_pg.count_paths()
            print(f'Total paths (with cycles): {total_paths}')

            #cf_paths = combined_pg.sample_cf_paths(100000)
            pg_elapsed = time.time() - pg_start
            times_pg[i, j] = pg_elapsed

            # Now compute the CFPG
            cfpg_list = []
            for pg in pg_list:
                cfpg = CFPG.from_pg(pg)
                cfpg_list.append(cfpg)
            cfpg_elapsed = time.time() - pg_start
            times_cfpg[i, j] = cfpg_elapsed
    return times_nx_paths, times_pg, times_cfpg
예제 #3
0
def run_pg_vs_nx(graph, source, target, depth, num_samples):
    # PG sampling
    start = time.time()
    f_level, b_level = get_reachable_sets(graph, source, target, depth)
    pg_list = []
    for i in range(1, depth + 1):
        pg = PathsGraph.from_graph(graph, source, target, i, f_level, b_level)
        pg_list.append(pg)
    combined_pg = CombinedPathsGraph(pg_list)
    print("Sampling from PG")
    cf_paths = []
    while len(cf_paths) < num_samples:
        print(f'{len(cf_paths)} / {num_samples}')
        cf_path_chunk = combined_pg.sample_paths(100)
    #cf_paths = []
    end = time.time()
    #print("Done sampling from PG")
    print("Done generating PGs")
    pg_elapsed = end - start

    # Networkx enumeration
    index = 0
    start = time.time()
    nx_paths = []
    nx_sampled_paths = []
    """
    for p in nx.all_simple_paths(graph, source, target, cutoff=depth):
        nx_paths.append(tuple(p))
        if index % 10000 == 0:
            print(index)
        index += 1
    #print("Making PathsTree")
    #paths_tree = PathsTree(nx_paths)
    #print("Sampling PathsTree")
    #nx_sampled_paths = paths_tree.sample(num_samples)
    end = time.time()
    nx_elapsed = end - start
    #assert set(cf_paths) <= set(nx_paths)
    print("all_simple_paths done")
    print("Total paths (nx):", len(nx_paths))
    print("Unique sampled paths (pg):", len(set(cf_paths)))
    #print("Unique sampled_paths (tree):", len(set(nx_sampled_paths)))
    print("NX time", nx_elapsed)
    print("PG time", pg_elapsed)

    nx_sampled_paths = []
    """
    nx_elapsed = 0
    return {
        'pg_list': pg_list,
        'pg_paths': cf_paths,
        'nx_paths': nx_paths,
        'nx_paths_sampled': nx_sampled_paths,
        'pg_time': pg_elapsed,
        'nx_time': nx_elapsed
    }
예제 #4
0
파일: cfpg.py 프로젝트: kkaris/paths_graph
    def from_graph(klass, *args, **kwargs):
        """Get an instance of a CFPG from a graph.

        Parameters
        ----------
        g : networkx.DiGraph
            The underlying graph on which paths will be generated.
        source : str
            Name of the source node.
        target : str
            Name of the target node.
        target_polarity : int
            Whether the desired path from source to target is positive (0)
            or negative (1).
        length : int
            Length of paths to compute.
        fwd_reachset : Optional[dict]
            Dictionary of sets representing the forward reachset computed over
            the original graph g up to a maximum depth greater than the
            requested path length.  If not provided, the forward reach set is
            calculated up to the requested path length up to the requested path
            length by calling paths_graph.get_reachable_sets.
        back_reachset : Optional[dict]
            Dictionary of sets representing the backward reachset computed over
            the original graph g up to a maximum depth greater than the
            requested path length.  If not provided, the backward reach set is
            calculated up to the requested path length up to the requested path
            length by calling paths_graph.get_reachable_sets.
        signed : bool
            Specifies whether the underlying graph and the corresponding
            f_level and b_level reachable sets have signed edges.  If True,
            sign information should be encoded in the 'sign' field of the edge
            data, with 0 indicating a positive edge and 1 indicating a negative
            edge.
        target_polarity : 0 or 1
            Specifies the polarity of the target node: 0 indicates
            positive/activation, 1 indicates negative/inhibition.

        Returns
        -------
        CFPG
            Instance of CFPG class representing cycle-free paths from source to
            target with a given length and overall polarity.
        """
        #pre_cfpg = PreCFPG.from_graph(*args, **kwargs)
        pg = PathsGraph.from_graph(*args, **kwargs)
        return klass.from_pg(pg)
예제 #5
0
print("Getting reachable sets")
fwd_reach, back_reach = get_reachable_sets(g,
                                           source,
                                           target,
                                           max_depth,
                                           signed=False)

print("Building PG")
pg_list = []
for cur_length in range(1, max_depth + 1):
    print("Building paths graph for length %d" % cur_length)
    pg = PathsGraph.from_graph(g,
                               source,
                               target,
                               cur_length,
                               fwd_reach,
                               back_reach,
                               signed=False,
                               target_polarity=0)
    pg_list.append(pg)

print("Building combined paths graph")
cpg = CombinedPathsGraph(pg_list)

print("Sampling %d paths" % num_samples)
paths = cpg.sample_cf_paths(num_samples)
path_ctr = Counter(paths)
path_ctr = sorted([(k, v) for k, v in path_ctr.items()],
                  key=lambda x: x[1],
                  reverse=True)
예제 #6
0
def run_pg_cfpg(rg, source, target):
    num_nodes = len(rg)
    # Time to compute paths_graphs and make combined graph
    pg_start = time.time()
    f_level, b_level = get_reachable_sets(rg, source, target, num_nodes)
    pg_list = []
    for length in range(1, num_nodes):
        pg = PathsGraph.from_graph(rg, source, target, length, f_level,
                                   b_level)
        pg_list.append(pg)
    combined_pg = CombinedPathsGraph(pg_list)

    ht = HypothesisTester(0.5, 0.1, 0.1, 0.05)
    tf = None
    tfs = []
    nsamples = 0
    batch = 10
    while tf is None:
        new_paths = combined_pg.sample_cf_paths(batch)
        if not new_paths:
            tf = 0
            break
        tfs += [exists_property(p, 5) for p in new_paths]
        nsamples += batch
        tf = ht.test(tfs)
    print(f'PG: {tf} based on {nsamples} samples')

    # cf_paths = combined_pg.sample_cf_paths(10000)
    # print(prob_ascending_path(cf_paths))

    pg_elapsed = time.time() - pg_start
    print(f'PG: {pg_elapsed:.2f}s')

    # Now compute the CFPG
    cfpg_list = []
    for pg in pg_list:
        cfpg = CFPG.from_pg(pg)
        cfpg_list.append(cfpg)
    ccfpg = CombinedCFPG(cfpg_list)

    print('Sampling CFPG')
    ht = HypothesisTester(0.5, 0.1, 0.1, 0.05)
    tf = None
    tfs = []
    nsamples = 0
    batch = 10
    while tf is None:
        new_paths = ccfpg.sample_paths(batch)
        if not new_paths:
            tf = 0
            break
        tfs += [exists_property(p, 5) for p in new_paths]
        nsamples += batch
        tf = ht.test(tfs)
    print(f'CFPG: {tf} based on {nsamples} samples')

    #cfpg_paths = ccfpg.sample_paths(10000)
    #print(prob_ascending_path(cfpg_paths))

    cfpg_elapsed = time.time() - pg_start
    print(f'CFPG: {cfpg_elapsed:.2f}s')
    return pg_elapsed, cfpg_elapsed
예제 #7
0
target = chek2_node
depth = 6
num_samples = 1000

f_level, b_level = get_reachable_sets(graph,
                                      source,
                                      target,
                                      depth,
                                      signed=True)
pg_list = []

for i in range(1, depth + 1):
    pg = PathsGraph.from_graph(graph,
                               source,
                               target,
                               i,
                               f_level,
                               b_level,
                               signed=True,
                               target_polarity=1)
    pg_list.append(pg)
combined_pg = CombinedPathsGraph(pg_list)
cf_paths = combined_pg.sample_cf_paths(num_samples)
"""
dist = get_node_distribution(cf_paths, None, None)
dist_filt = [(n[0][0][2], n[1]) for n in dist]
dist_filt = [n for n in dist_filt if n[0] not in ['o', 'SRC', 'CHEK2']]

node_dist = dist_filt
str_names, freqs = zip(*node_dist)
num_genes = 30
plt.ion()
예제 #8
0
    # Add a dummy source
    graph_file = '../input/july_2018_pa_HGNC_FPLX_typed_directional_pairs.tsv'
    graph = load_stmt_graph(graph_file)
    dummy_edges = [('SOURCE', src[1]) for src in source_list]
    dummy_edges += [(tgt[1], 'TARGET') for tgt in target_list]
    graph.add_edges_from(dummy_edges)

    max_depth = 8
    pg_list = []
    lengths = []
    stmt_counts = []
    f_level, b_level = get_reachable_sets(graph, 'SOURCE', 'TARGET', max_depth)
    for length in range(3, max_depth + 1):
        pg = PathsGraph.from_graph(graph,
                                   'SOURCE',
                                   'TARGET',
                                   length,
                                   fwd_reachset=f_level,
                                   back_reachset=b_level)

        stmt_hashes = get_stmt_hashes_from_pg(graph, pg)
        print("%d stmts for paths of length %d" %
              (len(stmt_hashes), length - 2))
        pg_list.append(pg)
        lengths.append(length - 2)
        stmt_counts.append(len(stmt_hashes))
    plt.ion()
    plt.plot(lengths, stmt_hashes)
    ax = plt.gca()
    ax.set_yscale('log')
예제 #9
0
    # Draw G
    draw(g, join(output_dir, 'toy_g.pdf'))

    depth = 4
    source = 'S'
    target = 'T'

    f_level, b_level = get_reachable_sets(g, source, target, depth)
    draw_reachset(g, f_level, 'forward', depth, output_dir)
    draw_reachset(g, b_level, 'backward', depth, output_dir)

    print("f_level", f_level)
    print("b_level", b_level)

    pg = PathsGraph.from_graph(g, source, target, depth)
    draw(pg.graph, join(output_dir, 'toy_pg_%d.pdf' % depth))

    # Combined paths graph
    pg_list = []
    for i in range(1, 4+1):
        pg_list.append(PathsGraph.from_graph(g, source, target, i))
    cpg = CombinedPathsGraph(pg_list)
    draw(cpg.graph, join(output_dir, 'toy_combined_pg.pdf'))

    # Cycle-free paths graph
    cfpg = CFPG.from_pg(pg)
    # Remove the frozensets for drawing
    cfpg_edges_fixed = []
    for u, v in cfpg.graph.edges():
        u_set = '{}' if u[2] == 0 else str(set(u[2]))