Example #1
0
def scaling_random_graphs(num_samples, min_size, max_size, edge_prob=0.5):
    data_shape =  (max_size - min_size + 1, num_samples)
    times_nx_paths = np.empty(data_shape)
    times_pg = np.empty(data_shape)
    times_cfpg = np.empty(data_shape)

    # Iterate over number of nodes in network
    for i, num_nodes in enumerate(range(min_size, max_size+1)):
        print(f'Number of nodes in network: {num_nodes}')

        # Iterate over num_samples random graphs of this size
        for j in range(num_samples):
            print(f'Sample {j}')
            # Generate a random graph
            rg = nx.erdos_renyi_graph(num_nodes, edge_prob, directed=True)

            # Select two nodes as source and target
            source = 0
            target = num_nodes - 1

            # Time to compute all simple paths with path probabilities
            start = time.time()
            paths = [tuple(p) for p in nx.all_simple_paths(rg, source, target)]
            #paths2 = [tuple(p) for p in nx.shortest_simple_paths(rg, source, target)]
            #assert(set(paths) == set(paths2))

            # Now build a path tree from the paths and calculate probabilities
            pt = PathsTree(paths)
            path_probs = pt.path_probabilities()
            # Save the time it took the calculate
            end = time.time()
            elapsed = end - start
            times_nx_paths[i, j] = elapsed

            # Time to compute paths_graphs and make combined graph
            pg_start = time.time()
            f_level, b_level = get_reachable_sets(rg, source, target, num_nodes)
            pg_list = []
            for length in range(1, num_nodes):
                pg = PathsGraph.from_graph(rg, source, target, length,
                                           f_level, b_level)
                pg_list.append(pg)
            combined_pg = CombinedPathsGraph(pg_list)
            # NOTE: no count_paths method
            total_paths = combined_pg.count_paths()
            print(f'Total paths (with cycles): {total_paths}')

            #cf_paths = combined_pg.sample_cf_paths(100000)
            pg_elapsed = time.time() - pg_start
            times_pg[i, j] = pg_elapsed

            # Now compute the CFPG
            cfpg_list = []
            for pg in pg_list:
                cfpg = CFPG.from_pg(pg)
                cfpg_list.append(cfpg)
            cfpg_elapsed = time.time() - pg_start
            times_cfpg[i, j] = cfpg_elapsed
    return times_nx_paths, times_pg, times_cfpg
def run_pg_vs_nx(graph, source, target, depth, num_samples):
    # PG sampling
    start = time.time()
    f_level, b_level = get_reachable_sets(graph, source, target, depth)
    pg_list = []
    for i in range(1, depth + 1):
        pg = PathsGraph.from_graph(graph, source, target, i, f_level, b_level)
        pg_list.append(pg)
    combined_pg = CombinedPathsGraph(pg_list)
    print("Sampling from PG")
    cf_paths = []
    while len(cf_paths) < num_samples:
        print(f'{len(cf_paths)} / {num_samples}')
        cf_path_chunk = combined_pg.sample_paths(100)
    #cf_paths = []
    end = time.time()
    #print("Done sampling from PG")
    print("Done generating PGs")
    pg_elapsed = end - start

    # Networkx enumeration
    index = 0
    start = time.time()
    nx_paths = []
    nx_sampled_paths = []
    """
    for p in nx.all_simple_paths(graph, source, target, cutoff=depth):
        nx_paths.append(tuple(p))
        if index % 10000 == 0:
            print(index)
        index += 1
    #print("Making PathsTree")
    #paths_tree = PathsTree(nx_paths)
    #print("Sampling PathsTree")
    #nx_sampled_paths = paths_tree.sample(num_samples)
    end = time.time()
    nx_elapsed = end - start
    #assert set(cf_paths) <= set(nx_paths)
    print("all_simple_paths done")
    print("Total paths (nx):", len(nx_paths))
    print("Unique sampled paths (pg):", len(set(cf_paths)))
    #print("Unique sampled_paths (tree):", len(set(nx_sampled_paths)))
    print("NX time", nx_elapsed)
    print("PG time", pg_elapsed)

    nx_sampled_paths = []
    """
    nx_elapsed = 0
    return {
        'pg_list': pg_list,
        'pg_paths': cf_paths,
        'nx_paths': nx_paths,
        'nx_paths_sampled': nx_sampled_paths,
        'pg_time': pg_elapsed,
        'nx_time': nx_elapsed
    }
print("Getting reachable sets")
fwd_reach, back_reach = get_reachable_sets(g,
                                           source,
                                           target,
                                           max_depth,
                                           signed=False)

print("Building PG")
pg_list = []
for cur_length in range(1, max_depth + 1):
    print("Building paths graph for length %d" % cur_length)
    pg = PathsGraph.from_graph(g,
                               source,
                               target,
                               cur_length,
                               fwd_reach,
                               back_reach,
                               signed=False,
                               target_polarity=0)
    pg_list.append(pg)

print("Building combined paths graph")
cpg = CombinedPathsGraph(pg_list)

print("Sampling %d paths" % num_samples)
paths = cpg.sample_cf_paths(num_samples)
path_ctr = Counter(paths)
path_ctr = sorted([(k, v) for k, v in path_ctr.items()],
                  key=lambda x: x[1],
                  reverse=True)
Example #4
0
def run_pg_cfpg(rg, source, target):
    num_nodes = len(rg)
    # Time to compute paths_graphs and make combined graph
    pg_start = time.time()
    f_level, b_level = get_reachable_sets(rg, source, target, num_nodes)
    pg_list = []
    for length in range(1, num_nodes):
        pg = PathsGraph.from_graph(rg, source, target, length, f_level,
                                   b_level)
        pg_list.append(pg)
    combined_pg = CombinedPathsGraph(pg_list)

    ht = HypothesisTester(0.5, 0.1, 0.1, 0.05)
    tf = None
    tfs = []
    nsamples = 0
    batch = 10
    while tf is None:
        new_paths = combined_pg.sample_cf_paths(batch)
        if not new_paths:
            tf = 0
            break
        tfs += [exists_property(p, 5) for p in new_paths]
        nsamples += batch
        tf = ht.test(tfs)
    print(f'PG: {tf} based on {nsamples} samples')

    # cf_paths = combined_pg.sample_cf_paths(10000)
    # print(prob_ascending_path(cf_paths))

    pg_elapsed = time.time() - pg_start
    print(f'PG: {pg_elapsed:.2f}s')

    # Now compute the CFPG
    cfpg_list = []
    for pg in pg_list:
        cfpg = CFPG.from_pg(pg)
        cfpg_list.append(cfpg)
    ccfpg = CombinedCFPG(cfpg_list)

    print('Sampling CFPG')
    ht = HypothesisTester(0.5, 0.1, 0.1, 0.05)
    tf = None
    tfs = []
    nsamples = 0
    batch = 10
    while tf is None:
        new_paths = ccfpg.sample_paths(batch)
        if not new_paths:
            tf = 0
            break
        tfs += [exists_property(p, 5) for p in new_paths]
        nsamples += batch
        tf = ht.test(tfs)
    print(f'CFPG: {tf} based on {nsamples} samples')

    #cfpg_paths = ccfpg.sample_paths(10000)
    #print(prob_ascending_path(cfpg_paths))

    cfpg_elapsed = time.time() - pg_start
    print(f'CFPG: {cfpg_elapsed:.2f}s')
    return pg_elapsed, cfpg_elapsed
Example #5
0
    f_level, b_level = get_reachable_sets(g, source, target, depth)
    draw_reachset(g, f_level, 'forward', depth, output_dir)
    draw_reachset(g, b_level, 'backward', depth, output_dir)

    print("f_level", f_level)
    print("b_level", b_level)

    pg = PathsGraph.from_graph(g, source, target, depth)
    draw(pg.graph, join(output_dir, 'toy_pg_%d.pdf' % depth))

    # Combined paths graph
    pg_list = []
    for i in range(1, 4+1):
        pg_list.append(PathsGraph.from_graph(g, source, target, i))
    cpg = CombinedPathsGraph(pg_list)
    draw(cpg.graph, join(output_dir, 'toy_combined_pg.pdf'))

    # Cycle-free paths graph
    cfpg = CFPG.from_pg(pg)
    # Remove the frozensets for drawing
    cfpg_edges_fixed = []
    for u, v in cfpg.graph.edges():
        u_set = '{}' if u[2] == 0 else str(set(u[2]))
        v_set = '{}' if v[2] == 0 else str(set(v[2]))
        u_fixed = str((u[0], u[1], u_set))
        v_fixed = str((v[0], v[1], v_set))
        cfpg_edges_fixed.append((u_fixed, v_fixed))
    cfpg_fixed = nx.DiGraph()
    cfpg_fixed.add_edges_from(cfpg_edges_fixed)
    draw(cfpg_fixed, join(output_dir, 'toy_cfpg_%d.pdf' % depth))
    # Node distribution
    source = 'SRC'
    target = 'CHEK2'
    result = run_pg_vs_nx(graph, source, target, MAX_DEPTH, 10000)
    #print("Pickling")
    #with open('pc_egfr_mapk1_max%d.pkl' % MAX_DEPTH, 'wb') as f:
    # pickle.dump(result, f)

    #with open('egfr_mapk1_depth_10_result.pkl', 'rb') as f:
    #    result = pickle.load(f)

    path_counts = []
    for pg in result['pg_list']:
        path_counts.append(pg.count_paths())
    combined_pg = CombinedPathsGraph(result['pg_list'])
    total_paths = np.sum(path_counts)
    print(path_counts)
    print(total_paths)

    # Plot num paths vs length
    plt.show
    plt.figure(figsize=(5, 2), dpi=150)
    ypos = list(range(1, MAX_DEPTH + 1))
    plt.bar(ypos, path_counts, align='center')
    #plt.xticks(ypos, str_names[:num_genes], rotation='vertical')
    ax = plt.gca()
    plt.ylabel('Number of paths')
    plt.xlabel('Path length')
    ax.set_yscale('log')
    #plt.subplots_adjust(bottom=0.3)