def scaling_random_graphs(num_samples, min_size, max_size, edge_prob=0.5): data_shape = (max_size - min_size + 1, num_samples) times_nx_paths = np.empty(data_shape) times_pg = np.empty(data_shape) times_cfpg = np.empty(data_shape) # Iterate over number of nodes in network for i, num_nodes in enumerate(range(min_size, max_size+1)): print(f'Number of nodes in network: {num_nodes}') # Iterate over num_samples random graphs of this size for j in range(num_samples): print(f'Sample {j}') # Generate a random graph rg = nx.erdos_renyi_graph(num_nodes, edge_prob, directed=True) # Select two nodes as source and target source = 0 target = num_nodes - 1 # Time to compute all simple paths with path probabilities start = time.time() paths = [tuple(p) for p in nx.all_simple_paths(rg, source, target)] #paths2 = [tuple(p) for p in nx.shortest_simple_paths(rg, source, target)] #assert(set(paths) == set(paths2)) # Now build a path tree from the paths and calculate probabilities pt = PathsTree(paths) path_probs = pt.path_probabilities() # Save the time it took the calculate end = time.time() elapsed = end - start times_nx_paths[i, j] = elapsed # Time to compute paths_graphs and make combined graph pg_start = time.time() f_level, b_level = get_reachable_sets(rg, source, target, num_nodes) pg_list = [] for length in range(1, num_nodes): pg = PathsGraph.from_graph(rg, source, target, length, f_level, b_level) pg_list.append(pg) combined_pg = CombinedPathsGraph(pg_list) # NOTE: no count_paths method total_paths = combined_pg.count_paths() print(f'Total paths (with cycles): {total_paths}') #cf_paths = combined_pg.sample_cf_paths(100000) pg_elapsed = time.time() - pg_start times_pg[i, j] = pg_elapsed # Now compute the CFPG cfpg_list = [] for pg in pg_list: cfpg = CFPG.from_pg(pg) cfpg_list.append(cfpg) cfpg_elapsed = time.time() - pg_start times_cfpg[i, j] = cfpg_elapsed return times_nx_paths, times_pg, times_cfpg
def run_pg_vs_nx(graph, source, target, depth, num_samples): # PG sampling start = time.time() f_level, b_level = get_reachable_sets(graph, source, target, depth) pg_list = [] for i in range(1, depth + 1): pg = PathsGraph.from_graph(graph, source, target, i, f_level, b_level) pg_list.append(pg) combined_pg = CombinedPathsGraph(pg_list) print("Sampling from PG") cf_paths = [] while len(cf_paths) < num_samples: print(f'{len(cf_paths)} / {num_samples}') cf_path_chunk = combined_pg.sample_paths(100) #cf_paths = [] end = time.time() #print("Done sampling from PG") print("Done generating PGs") pg_elapsed = end - start # Networkx enumeration index = 0 start = time.time() nx_paths = [] nx_sampled_paths = [] """ for p in nx.all_simple_paths(graph, source, target, cutoff=depth): nx_paths.append(tuple(p)) if index % 10000 == 0: print(index) index += 1 #print("Making PathsTree") #paths_tree = PathsTree(nx_paths) #print("Sampling PathsTree") #nx_sampled_paths = paths_tree.sample(num_samples) end = time.time() nx_elapsed = end - start #assert set(cf_paths) <= set(nx_paths) print("all_simple_paths done") print("Total paths (nx):", len(nx_paths)) print("Unique sampled paths (pg):", len(set(cf_paths))) #print("Unique sampled_paths (tree):", len(set(nx_sampled_paths))) print("NX time", nx_elapsed) print("PG time", pg_elapsed) nx_sampled_paths = [] """ nx_elapsed = 0 return { 'pg_list': pg_list, 'pg_paths': cf_paths, 'nx_paths': nx_paths, 'nx_paths_sampled': nx_sampled_paths, 'pg_time': pg_elapsed, 'nx_time': nx_elapsed }
print("Getting reachable sets") fwd_reach, back_reach = get_reachable_sets(g, source, target, max_depth, signed=False) print("Building PG") pg_list = [] for cur_length in range(1, max_depth + 1): print("Building paths graph for length %d" % cur_length) pg = PathsGraph.from_graph(g, source, target, cur_length, fwd_reach, back_reach, signed=False, target_polarity=0) pg_list.append(pg) print("Building combined paths graph") cpg = CombinedPathsGraph(pg_list) print("Sampling %d paths" % num_samples) paths = cpg.sample_cf_paths(num_samples) path_ctr = Counter(paths) path_ctr = sorted([(k, v) for k, v in path_ctr.items()], key=lambda x: x[1], reverse=True)
def run_pg_cfpg(rg, source, target): num_nodes = len(rg) # Time to compute paths_graphs and make combined graph pg_start = time.time() f_level, b_level = get_reachable_sets(rg, source, target, num_nodes) pg_list = [] for length in range(1, num_nodes): pg = PathsGraph.from_graph(rg, source, target, length, f_level, b_level) pg_list.append(pg) combined_pg = CombinedPathsGraph(pg_list) ht = HypothesisTester(0.5, 0.1, 0.1, 0.05) tf = None tfs = [] nsamples = 0 batch = 10 while tf is None: new_paths = combined_pg.sample_cf_paths(batch) if not new_paths: tf = 0 break tfs += [exists_property(p, 5) for p in new_paths] nsamples += batch tf = ht.test(tfs) print(f'PG: {tf} based on {nsamples} samples') # cf_paths = combined_pg.sample_cf_paths(10000) # print(prob_ascending_path(cf_paths)) pg_elapsed = time.time() - pg_start print(f'PG: {pg_elapsed:.2f}s') # Now compute the CFPG cfpg_list = [] for pg in pg_list: cfpg = CFPG.from_pg(pg) cfpg_list.append(cfpg) ccfpg = CombinedCFPG(cfpg_list) print('Sampling CFPG') ht = HypothesisTester(0.5, 0.1, 0.1, 0.05) tf = None tfs = [] nsamples = 0 batch = 10 while tf is None: new_paths = ccfpg.sample_paths(batch) if not new_paths: tf = 0 break tfs += [exists_property(p, 5) for p in new_paths] nsamples += batch tf = ht.test(tfs) print(f'CFPG: {tf} based on {nsamples} samples') #cfpg_paths = ccfpg.sample_paths(10000) #print(prob_ascending_path(cfpg_paths)) cfpg_elapsed = time.time() - pg_start print(f'CFPG: {cfpg_elapsed:.2f}s') return pg_elapsed, cfpg_elapsed
f_level, b_level = get_reachable_sets(g, source, target, depth) draw_reachset(g, f_level, 'forward', depth, output_dir) draw_reachset(g, b_level, 'backward', depth, output_dir) print("f_level", f_level) print("b_level", b_level) pg = PathsGraph.from_graph(g, source, target, depth) draw(pg.graph, join(output_dir, 'toy_pg_%d.pdf' % depth)) # Combined paths graph pg_list = [] for i in range(1, 4+1): pg_list.append(PathsGraph.from_graph(g, source, target, i)) cpg = CombinedPathsGraph(pg_list) draw(cpg.graph, join(output_dir, 'toy_combined_pg.pdf')) # Cycle-free paths graph cfpg = CFPG.from_pg(pg) # Remove the frozensets for drawing cfpg_edges_fixed = [] for u, v in cfpg.graph.edges(): u_set = '{}' if u[2] == 0 else str(set(u[2])) v_set = '{}' if v[2] == 0 else str(set(v[2])) u_fixed = str((u[0], u[1], u_set)) v_fixed = str((v[0], v[1], v_set)) cfpg_edges_fixed.append((u_fixed, v_fixed)) cfpg_fixed = nx.DiGraph() cfpg_fixed.add_edges_from(cfpg_edges_fixed) draw(cfpg_fixed, join(output_dir, 'toy_cfpg_%d.pdf' % depth))
# Node distribution source = 'SRC' target = 'CHEK2' result = run_pg_vs_nx(graph, source, target, MAX_DEPTH, 10000) #print("Pickling") #with open('pc_egfr_mapk1_max%d.pkl' % MAX_DEPTH, 'wb') as f: # pickle.dump(result, f) #with open('egfr_mapk1_depth_10_result.pkl', 'rb') as f: # result = pickle.load(f) path_counts = [] for pg in result['pg_list']: path_counts.append(pg.count_paths()) combined_pg = CombinedPathsGraph(result['pg_list']) total_paths = np.sum(path_counts) print(path_counts) print(total_paths) # Plot num paths vs length plt.show plt.figure(figsize=(5, 2), dpi=150) ypos = list(range(1, MAX_DEPTH + 1)) plt.bar(ypos, path_counts, align='center') #plt.xticks(ypos, str_names[:num_genes], rotation='vertical') ax = plt.gca() plt.ylabel('Number of paths') plt.xlabel('Path length') ax.set_yscale('log') #plt.subplots_adjust(bottom=0.3)