def calculate_sab(G, nodes_from, nodes_to): # distances WITHIN the two gene sets: d_A = separation.calc_single_set_distance(G, set(nodes_from)) d_B = separation.calc_single_set_distance(G, set(nodes_to)) # distances BETWEEN the two gene sets: d_AB = separation.calc_set_pair_distances(G, set(nodes_from), set(nodes_to)) # calculate separation s_AB = d_AB - (d_A + d_B) / 2. return (s_AB)
def analyze_proteins(protein_a, protein_b): if (protein_a, protein_b) in cache: return cache[(protein_a, protein_b)] genes_A = set(genes[protein_a]) & all_genes_in_network genes_B = set(genes[protein_b]) & all_genes_in_network # Perform calculations d_A = calc_single_set_distance(G, genes_A) d_B = calc_single_set_distance(G, genes_B) d_AB = calc_set_pair_distances(G, genes_A, genes_B) s_AB = d_AB - (d_A + d_B)/2. return d_AB, s_AB
def analyze_proteins(protein_a, protein_b): if (protein_a, protein_b) in cache: return cache[(protein_a, protein_b)] genes_A = set(genes[protein_a]) & all_genes_in_network genes_B = set(genes[protein_b]) & all_genes_in_network # Perform calculations d_A = calc_single_set_distance(G, genes_A) d_B = calc_single_set_distance(G, genes_B) d_AB = calc_set_pair_distances(G, genes_A, genes_B) s_AB = d_AB - (d_A + d_B) / 2. return d_AB, s_AB
def analyze_proteins(protein_a, protein_b): genes_A = set(genes[protein_a]) & all_genes_in_network genes_B = set(genes[protein_b]) & all_genes_in_network all_path_lengths = get_pathlengths_for_two_sets(G, genes_A, genes_B) all_distances = [] # Perform calculations d_A = calc_single_set_distance(G, genes_A) d_B = calc_single_set_distance(G, genes_B) for gene_A in genes_A: all_distances_A = [] for gene_B in genes_B: if gene_A == gene_B: all_distances_A.append((gene_A, gene_B, 0)) else: try: all_distances_A.append((gene_A, gene_B, all_path_lengths[min(gene_A, gene_B)][max(gene_A, gene_B)])) except KeyError: pass if len(all_distances_A) > 0: all_distances.append(min(all_distances_A, key=lambda x: x[2])) for gene_B in genes_B: all_distances_B = [] for gene_A in genes_A: if gene_A == gene_B: all_distances_B.append((gene_A, gene_B, 0)) else: try: all_distances_B.append((gene_B, gene_A, all_path_lengths[min(gene_A, gene_B)][max(gene_A, gene_B)])) except KeyError: pass if len(all_distances_B) > 0: all_distances.append(min(all_distances_B, key=lambda x: x[2])) d_AB = np.mean(map(lambda x: x[2], all_distances)) s_AB = d_AB - (d_A + d_B)/2. return d_AB, s_AB, sorted(all_distances, key=lambda x: x[2])
def compare_SD(disease_nodes, Gint, num_reps=10): SD_disease = separation.calc_single_set_distance(Gint,disease_nodes) # get random distribution SD_rand = [] for i in range(num_reps): print('calculating random set ' + str(i) + ' out of ' + str(num_reps)) G_temp = nx.configuration_model(Gint.degree().values()) G_rand = nx.Graph() # switch from multigraph to digraph G_rand.add_edges_from(G_temp.edges()) # remove self-loops #G_rand.remove_edges_from(G_rand.selfloop_edges()) G_rand = nx.relabel_nodes(G_rand,dict(zip(range(len(G_rand.nodes())),Gint.degree().keys()))) rand_seeds = disease_nodes #set(random.sample(Gint.nodes(),len(disease_nodes))) # get random shortest distances SD_rand.extend(separation.calc_single_set_distance(G_rand,rand_seeds)) return SD_disease, SD_rand
# -------------------------------------------------------- # # CALCULATE NETWORK QUANTITIES # # -------------------------------------------------------- # get lcc size S lcc = get_lcc_size(G, gene_set) print("\n> lcc size = %s" % (get_lcc_size(G, gene_set))) edge_result = get_edges_size(G, gene_set) mean = edge_result / len(gene_set) print("> edges = %s " % edge_result) print("> mean edges = %s" % mean) # get mean shortest distance d_s = tools.calc_single_set_distance(G, gene_set) print("> mean shortest distance = %s" % (d_s)) results_message = """ > gene set from \"%s\": %s genes > lcc size S = %s > diameter d_s = %s """ % (gene_file, len(gene_set), lcc, d_s) # -------------------------------------------------------- # # CALCULATE RANDOM COMPARISON # # -------------------------------------------------------- results_message = results_message + get_random_comparison(G, gene_set, sims)
len(gene_set_full - all_genes_in_network)) print "> remaining number of genes: %s" %(len(gene_set)) # -------------------------------------------------------- # # CALCULATE NETWORK QUANTITIES # # -------------------------------------------------------- # get lcc size S lcc = get_lcc_size(G,gene_set) print "\n> lcc size = %s" %(lcc) # get mean shortest distance d_s = tools.calc_single_set_distance(G,gene_set) print "> mean shortest distance = %s" %(d_s) results_message = """ > gene set from \"%s\": %s genes > lcc size S = %s > diameter d_s = %s """%(gene_file,len(gene_set),lcc,d_s) # -------------------------------------------------------- # # CALCULATE RANDOM COMPARISON # # -------------------------------------------------------- results_message += get_random_comparison(G,gene_set,sims)