def add_degree_features(G: DiGraph, df: pd.DataFrame) -> pd.DataFrame: source_in_degree = [] source_out_degree = [] source_bi_degree = [] source_nbrs = [] sink_in_degree = [] sink_out_degree = [] sink_bi_degree = [] sink_nbrs = [] common_neighbors = [] total_neighbors = [] transitive_links = [] JC_predecessors = [] JC_successors = [] JC_transient_in = [] JC_transient_out = [] JC_neighbors = [] cos_predecessors = [] cos_successors = [] cos_transient_in = [] cos_transient_out = [] cos_neighbors = [] PA_predecessors = [] PA_successors = [] PA_transient_in = [] PA_transient_out = [] PA_neighbors = [] RA_predecessors = [] RA_successors = [] RA_transient_in = [] RA_transient_out = [] RA_neighbors = [] AA_predecessors = [] AA_successors = [] AA_transient_in = [] AA_transient_out = [] AA_neighbors = [] hub_promoted_index = [] hub_suppressed_index = [] for i, row in tqdm(df.iterrows()): source, sink = row["edge"] try: s_in = set(G.predecessors(source)) s_out = set(G.successors(source)) s_bi = set(s_in.intersection(s_out)) s_nbrs = set(s_in.union(s_out)) except: s_in = set() s_out = set() s_bi = set() s_nbrs = set() try: d_in = set(G.predecessors(sink)) d_out = set(G.successors(sink)) d_bi = set(d_in.intersection(d_out)) d_nbrs = set(d_in.union(d_out)) except: d_in = set() d_out = set() d_bi = set() d_nbrs = set() source_in_degree.append(len(s_in)) source_out_degree.append(len(s_out)) source_bi_degree.append(len(s_bi)) source_nbrs.append(len(s_nbrs)) sink_in_degree.append(len(d_in)) sink_out_degree.append(len(d_out)) sink_bi_degree.append(len(d_bi)) sink_nbrs.append(len(d_nbrs)) common = len(s_nbrs.intersection(d_nbrs)) common_neighbors.append(common) total_neighbors.append(len(s_nbrs.union(d_nbrs))) transitive_links.append(len(s_out.intersection(d_in))) JC_predecessors.append(jaccard_coeff(s_in, d_in)) JC_successors.append(jaccard_coeff(s_out, d_out)) JC_transient_in.append(jaccard_coeff(s_out, d_in)) JC_transient_out.append(jaccard_coeff(s_in, d_out)) JC_neighbors.append(jaccard_coeff(s_nbrs, d_nbrs)) cos_predecessors.append(cosine_distance(s_in, d_in)) cos_successors.append(cosine_distance(s_out, d_out)) cos_transient_in.append(cosine_distance(s_out, d_in)) cos_transient_out.append(cosine_distance(s_in, d_out)) cos_neighbors.append(cosine_distance(s_nbrs, d_nbrs)) PA_predecessors.append(preferential_attachment(s_in, d_in)) PA_successors.append(preferential_attachment(s_out, d_out)) PA_transient_in.append(preferential_attachment(s_out, d_in)) PA_transient_out.append(preferential_attachment(s_in, d_out)) PA_neighbors.append(preferential_attachment(s_nbrs, d_nbrs)) RA_predecessors.append(directed_resource_allocation(s_in, d_in, G)) RA_successors.append(directed_resource_allocation(s_out, d_out, G)) RA_transient_in.append(directed_resource_allocation(s_out, d_in, G)) RA_transient_out.append(directed_resource_allocation(s_in, d_out, G)) RA_neighbors.append(directed_resource_allocation(s_nbrs, d_nbrs, G)) AA_predecessors.append(directed_adamic_adar(s_in, d_in, G)) AA_successors.append(directed_adamic_adar(s_out, d_out, G)) AA_transient_in.append(directed_adamic_adar(s_out, d_in, G)) AA_transient_out.append(directed_adamic_adar(s_in, d_out, G)) AA_neighbors.append(directed_adamic_adar(s_nbrs, d_nbrs, G)) try: hub_promoted_index.append(common / min([len(s_nbrs), len(d_nbrs)])) except: hub_promoted_index.append(0.0) try: hub_suppressed_index.append( common / max([len(s_nbrs), len(d_nbrs)])) except: hub_suppressed_index.append(0.0) df = pd.DataFrame({ "edge": df.edge, "source_in_degree": source_in_degree, "source_out_degree": source_out_degree, "source_bi_degree": source_bi_degree, "source_neighbors": source_nbrs, "sink_in_degree": sink_in_degree, "sink_out_degree": sink_out_degree, "sink_bi_degree": sink_out_degree, "sink_neighbors": sink_nbrs, "common_neighbors": common_neighbors, "total_neighbors": total_neighbors, "transitive_links": transitive_links, "JC_predecessors": JC_predecessors, "JC_successors": JC_successors, "JC_transient_in": JC_transient_in, "JC_transient_out": JC_transient_out, "JC_neighbors": JC_neighbors, "cos_predecessors": cos_predecessors, "cos_successors": cos_successors, "cos_transient_in": cos_transient_in, "cos_transient_out": cos_transient_out, "cos_neighbors": cos_neighbors, "PA_predecessors": PA_predecessors, "PA_successors": PA_successors, "PA_transient_in": PA_transient_in, "PA_transient_out": PA_transient_out, "PA_neighbors": PA_neighbors, "RA_predecessors": RA_predecessors, "RA_successors": RA_successors, "RA_transient_in": RA_transient_in, "RA_transient_out": RA_transient_out, "RA_neighbors": RA_neighbors, "AA_predecessors": AA_predecessors, "AA_successors": AA_successors, "AA_transient_in": AA_transient_in, "AA_transient_out": AA_transient_out, "AA_neighbors": AA_neighbors, "hub_promoted_index": hub_promoted_index, "hub_suppressed_index": hub_suppressed_index, }) # Other indices df["sorensen_index"] = 2 * ( df["common_neighbors"] / (df["source_neighbors"] + df["sink_neighbors"])) df["LHN_index"] = df["common_neighbors"] / (df["source_neighbors"] * df["sink_neighbors"]) # Calculate degree densities df["source_in_density"] = df["source_in_degree"] / df["source_neighbors"] df["source_out_density"] = df["source_out_degree"] / df["source_neighbors"] df["source_bi_density"] = df["source_bi_degree"] / df["source_neighbors"] df["sink_in_density"] = df["sink_in_degree"] / df["sink_neighbors"] df["sink_out_density"] = df["sink_out_degree"] / df["sink_neighbors"] df["sink_bi_density"] = df["sink_bi_degree"] / df["sink_neighbors"] return df
def tree0(weight_value, startwindow, term): print 'start window:', startwindow # windowGraph = {} cliqueGraph = DiGraph() dic_term = {} dic_last_time = {} dic_temp = {} dic_term_num = {} dic_intersect_level = {} # term = 183 root = 0 cliqueGraph.add_node(root, annotation='root', windowsize='root', weight_value='root') w = data.shape[1] i = 0 q = 0 for window in range(startwindow, w): dic_intersect_level.clear() #print window ## mine if window == startwindow: for clique in find_cliques(windowGraph[window]): if len(clique) >size_clique: cliqueGraph.add_node(term, annotation=list(clique), windowsize=[window], weight=weight_value) # generate a term cliqueGraph.add_edge(root, term) dic_term[frozenset(clique)] = [window] # dic_term 记录 window和clique or Dic_term records window and clique dic_term_num[frozenset(clique)] = term # dic_term_num 记录 term 序号和clique or Dic_term_num record term number and clique dic_last_time[frozenset(clique)] = [window] # dic_last_time 记录上一时刻生成的交集 用于下一时刻的比较 or Dic_last_time records the intersection generated at the last moment for comparison at the next moment term = term + 1 print 'for start window ' else: continue # print len(dic_last_time), len(dic_term), cliqueGraph.number_of_nodes() else: for clique in find_cliques(windowGraph[window]): if len(clique) > size_clique: #print window, 'clique:', clique ## mine for key, value in dic_last_time.items(): # key 是clique ,value是 [window] or Key is clique, value is [window] intersect = sorted(set(key).intersection(set(clique))) q = 0 # if len(intersect) >= size_clique: if len(intersect) >= size_clique: #print 'intersect', intersect # 同一层判断交集之间是否有重复的父子关系。 每生成一个交集, 判断当前层的其他term和交集的关系。or The same layer determines whether there are #duplicate parent-child relationships between intersections. Each generation of an intersection determines the relationship #between other terms and intersections of the current layer. for ik, iv in dic_intersect_level.items(): if set(intersect) == (set(ik)): # 生成一模一样的交集 or Generate exactly the same intersection # 判断两个的编号是否一样?or Is the two numbers the same? if dic_term_num[frozenset(key)] != dic_term_num[frozenset(ik)]: cliqueGraph.add_edge(dic_term_num[frozenset(key)], dic_term_num[frozenset(ik)]) q = 1 break elif set(intersect).issuperset(set(ik)): # 生成了超集 or Superset generated cliqueGraph.remove_node(dic_term_num[frozenset(ik)]) dic_term.pop(frozenset(ik)) # 从四个字典中都删除该节点的信息 or Delete the node's information from all four dictionaries dic_term_num.pop(frozenset(ik)) dic_intersect_level.pop(frozenset(ik)) dic_temp.pop(frozenset(ik)) elif set(intersect).issubset(set(ik)): # 生成了子集 or Generated subset q = 1 break if q == 1: continue dic_intersect_level[frozenset(intersect)] = 1 if dic_term.has_key(frozenset(intersect)): # 交集已经出现过 or Intersection has appeared parent = cliqueGraph.predecessors(dic_term_num[frozenset(intersect)]) children = cliqueGraph.successors(dic_term_num[frozenset(intersect)]) #print 'parent',len(parent) if len(parent) > 0: # 是交集生成的term,则重定向 or Is the intersection of generated term, then redirect cliqueGraph.add_node(term, annotation=list(intersect), windowsize=value + [window], weight=weight_value) for p in parent: cliqueGraph.add_edge(p, term) # 连边 // Edge for c in children: cliqueGraph.add_edge(term, c) # 连边 // edge cliqueGraph.remove_node(dic_term_num[frozenset(intersect)]) # 从图中删除冗余结点 or Remove redundant nodes from the figure # print 'deleted intersect nodes:',dic_term_num[frozenset(intersect)] i = i + 1 dic_term.pop(frozenset(intersect)) # 字典中删除 // Delete in dictionary dic_term_num.pop(frozenset(intersect)) dic_term[frozenset(intersect)] = value + [window] # 新节点插入字典 // New node insert dictionary dic_term_num[frozenset(intersect)] = term dic_temp[frozenset(intersect)] = value + [window] # 记录到dic_temp里 // Record to dic_temp term = term + 1 continue else: # 是window生成的term // Is the term generated by the window continue else: # 交集没有出现过, 则生成新的term // No intersection occurs, then a new term is generated # print 'new term intersect never appear:', term cliqueGraph.add_node(term, annotation=list(intersect), windowsize=value + [window], weight=weight_value) # generate a term cliqueGraph.add_edge(dic_term_num[frozenset(key)], term) # 连边,变化:只连接交集作为父亲。// Edge, change: Only connect intersections as fathers. dic_term[frozenset(intersect)] = value + [window] # 新节点插入字典 // New node insert dictionary dic_term_num[frozenset(intersect)] = term dic_temp[frozenset(intersect)] = value + [window] # 记录到dic_temp里 // Record to dic_temp term = term + 1 else: continue else: continue dic_last_time.clear() for key, value in dic_temp.items(): dic_last_time[key] = value dic_temp.clear() print 'window', startwindow, 'size is', cliqueGraph.number_of_nodes(), cliqueGraph.number_of_edges()## mine # print 'deleted nodes:', i # fw = open('0904edges_remove.txt', 'w') # fw2 = open('0904terms_remove.txt', 'w') # fw.write('parent' + '\t' + 'child' + '\n') # for edge in cliqueGraph.edges(): # fw.write(str(edge[0]) + '\t' + str(edge[1]) + '\n') # fw.close() # fw2.write('term_id' + '\t' + 'anno_genes' + '\t' + 'window' + '\t' + 'gene_size' + '\t' + 'window_size' + '\n') # for key, value in dic_term.items(): # fw2.write(str(dic_term_num[key]) + '\t' + str(key) + '\t' + str(value) + '\t' + str(len(key)) + '\t' + str(len(value)) + '\n') # fw2.close() # for nodes in cliqueGraph.nodes(): # if cliqueGraph.degree(nodes) == 0: # print nodes return cliqueGraph, dic_term, dic_term_num, term