def con_single_layer_net(edges, filter_value=0.1): disease, pathway = NetUtil.getNodes2HeterNet(edges) disease = list(disease) print("1st col -> {}\t 2nd col -> {}".format(len(disease), len(pathway))) nei_sim_matrix = cal_nei_sim(disease, edges, save_nei_sim=True) path_sim_matrix = cal_path_sim(disease, edges, save_path_sim=True) layer_net = [] layer_net_result = {} for i in range(0, len(disease)): for j in range(i + 1, len(disease)): fusion_sim = 0.5 * nei_sim_matrix[i][j] + 0.5 * path_sim_matrix[i][ j] if fusion_sim > filter_value: layer_net.append([disease[i], disease[j], fusion_sim]) layer_net_result["{}\t{}".format(disease[i], disease[j])] = fusion_sim print("complete a single layer similarity network...") layer_net_result = common.sortDict(layer_net_result) FileUtil.writeSortedDic2File(layer_net_result, "./layer_net.txt") return layer_net
def cal_path_sim(disease, edges, save_path_sim=False): print("begin calculate similarity based on path...") pathway = list(NetUtil.getColNodes(edges, col=1)) ajaMatrix = np.zeros((len(disease), len(pathway))) for line in edges: row_index = disease.index(line[0]) col_index = pathway.index(line[1]) ajaMatrix[row_index][col_index] = float(1) W = np.dot(ajaMatrix, ajaMatrix.T) print("construct similarity matrix...") pathSim = {} sim_matrix = np.zeros((len(disease), len(disease))) for i in range(0, len(disease)): for j in range(i + 1, len(disease)): # if W[i][j] != 0: pathSim["{}\t{}".format( disease[i], disease[j])] = 2 * W[i][j] / (W[i][i] + W[j][j]) sim_matrix[i][j] = 2 * W[i][j] / (W[i][i] + W[j][j]) sim_matrix[j][i] = 2 * W[i][j] / (W[i][i] + W[j][j]) if save_path_sim: print("sort the path similarity and save...") res = sorted(pathSim.items(), key=lambda x: x[1], reverse=True) FileUtil.writeSortedDic2File(res, "./path_sim.txt") return sim_matrix
def calculateDisSim(disease_microbe, output_file): ''' :param disease_microbe: 二维list,表示disease-microbe network :param output_file: str,表示结果保存的路径 :return: ''' begin_time = time.clock() microbe2disease = defaultdict(set) disease2microbe = defaultdict(set) for line in disease_microbe: microbe2disease[line[0]].add(line[1]) disease2microbe[line[1]].add(line[0]) print("there are {} diseases and {} microbes in disease-microbe.".format( len(disease2microbe.keys()), len(microbe2disease.keys()))) diseases = list(disease2microbe.keys()) microbes = list(microbe2disease.keys()) weight = np.zeros((len(disease2microbe.keys()), len(microbe2disease))) E = np.ones((len(disease2microbe.keys()), len(microbe2disease))) for line in disease_microbe: indexRow = diseases.index(line[1]) indexCol = microbes.index(line[0]) weight[indexRow][indexCol] += 1 if line[3] == "increase": E[indexRow][indexCol] = 1 elif line[3] == "decrease": E[indexRow][indexCol] = -1 for indexRow in range(0, len(diseases)): for indexCol in range(0, len(microbes)): # print math.log(diseaseNum / len(n.get(microbeList[indexCol], 2))) weight[indexRow][indexCol] *= E[indexRow][indexCol] * math.log2( float(len(diseases)) / len(microbe2disease[microbes[indexCol]])) # ------------------------------------------------------------------ MicrobeSim = {} for i in range(0, len(diseases)): for j in range(i + 1, len(diseases)): cosine_value = common.cosinValue(weight[i], weight[j]) if cosine_value != 0: MicrobeSim["{}\t{}".format(diseases[i], diseases[j])] = cosine_value MicrobeSim = common.sortDict(MicrobeSim) FileUtil.writeSortedDic2File(MicrobeSim, output_file) end_time = time.clock() print("MicrobeSim costs {}s".format(end_time - begin_time)) pass
def calculateDisSim(seed_list, net, output_path): ''' :param seed_list: dict,表示disease和其对应的genes。key:str,表示disease,value:set,表示genes :param net: 二维list,表示一个PPI网络 :param output_path: str,表示保存结果的路径 :return: ''' nodes = NetUtil.getNodes2HomoNet(net) print("there are {} diseases.".format(len(seed_list))) FRValueMatrix = np.zeros((len(seed_list), len(seed_list))) rowOfFR = 0 time1 = time.clock() for disease, genesOfDisease in seed_list.items(): leavaList = getCommonNodes(nodes, genesOfDisease) wk = walker.Walker(net) if len(leavaList) > 0: # run RWR(Random walk and restart),then get the proportion of all nodes temp_time1 = time.clock() nodesPercent = wk.run_exp(leavaList, 0.7, 1) temp_time = time.clock() print("{} - {} -> genes = {}, it cost {}s".format(rowOfFR, disease, len(leavaList), temp_time - temp_time1)) # calculate the FR_GeneSet's value colOfFR = 0 for disease2, genesOfDisease2 in seed_list.items(): FR = 0 for gene in genesOfDisease2: if gene in nodes: FR += float(nodesPercent[gene]) elif gene in genesOfDisease: FR += 1 else: FR += 0 FRValueMatrix[rowOfFR][colOfFR] = FR colOfFR += 1 rowOfFR += 1 print("begin to calculate NetSim value") # calculate the NetSim value of a pair of disease NetSimMatrix = np.zeros((len(seed_list), len(seed_list))) NetSimList = list(seed_list.keys()) rowOfFP = 0 for disease, genesOfDisease in seed_list.items(): colOfFP = 0 diseseGeneNum = len(genesOfDisease) for disease2, genesOfDisease2 in seed_list.items(): if disease is not disease2: disease2GeneNum = len(genesOfDisease2) NetSimMatrix[rowOfFP][colOfFP] = (FRValueMatrix[rowOfFP][colOfFP] + FRValueMatrix[colOfFP][rowOfFP]) / (diseseGeneNum + disease2GeneNum) colOfFP += 1 rowOfFP += 1 print("write the 'disease-diesae-value' to a file") simiResult = {} row, col = np.shape(NetSimMatrix) for i in range(0, row): for j in range(i + 1, col): if NetSimMatrix[i][j] > 0: simiResult['{}\t{}'.format(NetSimList[i], NetSimList[j])] = NetSimMatrix[i][j] sortedSimiResult = sorted(simiResult.items(), key=lambda x: x[1], reverse=True) FileUtil.writeSortedDic2File(sortedSimiResult, output_path) print("end") time2 = time.clock() print("NetSim total cost {}s.".format(time2-time1)) pass
def cal_nei_sim(disease, edges, save_nei_sim=False): print("begin to calculate similarity based on neighbours...") G = nx.Graph() G.add_edges_from(edges) # 将多种生物信息构造成异构矩阵 print( "step 1: epsilon -> 2, calculate first degree sequence and second degree sequence..." ) DegreeSequence1 = [] DegreeSequence2 = [] for di in disease: neighboursOne = G.neighbors(di) #获取节点的第一层邻居 degreeOfOne = [] neghboursTwo = [] for indexOfNeighbours in neighboursOne: degreeOfOne.append(nx.degree(G, indexOfNeighbours)) #保存第一层邻居的degree neghboursTwo.extend(G.neighbors(indexOfNeighbours)) #获取第一层邻居节点的邻居 sortedDegreeOfOne = sorted(degreeOfOne) #对第一层邻居的degree进行排序 DegreeSequence1.append(sortedDegreeOfOne) neghboursTwo = set(neghboursTwo) neghboursTwo.remove(di) #去除二层邻居节点的自己 degreeOfTwo = [] for indexOfNeighbours in neghboursTwo: degreeOfTwo.append(nx.degree(G, indexOfNeighbours)) #保存第二层邻居的degree sortedDegreeOfTwo = sorted(degreeOfTwo) #对第一层邻居的degree进行排序 DegreeSequence2.append(sortedDegreeOfTwo) cores = multiprocessing.cpu_count() # 获取计算机CPU数目 pool = multiprocessing.Pool(cores) # 构造一个线程池 print("step 2: compute neighbour_sim in parallel with {} cpus...".format( cores)) # 构造一个多线程的任务 resultsOne = [ pool.apply_async(dtw_distance_fast, (DegreeSequence1[i], DegreeSequence1[j])) for i in range(0, len(DegreeSequence1)) for j in range(i + 1, len(DegreeSequence1)) ] # 将成对的第一层degree sequence计算结果存储到数组中 arrOne = np.zeros((len(DegreeSequence1), len(DegreeSequence1))) i = 0 j = 1 for r in resultsOne: if j == len(DegreeSequence1): i += 1 j = i + 1 arrOne[i][j] = float(r.get()) j += 1 # 构造一个多线程任务 resultsTwo = [ pool.apply_async(dtw_distance_fast, (DegreeSequence2[i], DegreeSequence2[j])) for i in range(0, len(DegreeSequence2)) for j in range(i + 1, len(DegreeSequence2)) ] # 将成对的第二层degree sequence计算结果存储到数组中 arrTwo = np.zeros((len(DegreeSequence2), len(DegreeSequence2))) i = 0 j = 1 for r in resultsTwo: if j == len(DegreeSequence2): i += 1 j = i + 1 arrTwo[i][j] = float(r.get()) j += 1 # ---------------------------------------------------------------------------- print("step 3: construct similarity matrix...") alpha = 0.5 # a decaying weight factor α in the range between 0 and 1 NeiSim = {} sim_matrix = np.zeros((len(disease), len(disease))) for i in range(0, len(disease)): for j in range(i + 1, len(disease)): distance = math.pow(alpha, 1) * arrOne[i][j] + math.pow( alpha, 2) * arrTwo[i][j] NeiSim["{}\t{}".format(disease[i], disease[j])] = math.exp(-distance) sim_matrix[i][j] = math.exp(-distance) sim_matrix[j][i] = math.exp(-distance) if save_nei_sim: print("sort the path similarity and save...") res = sorted(NeiSim.items(), key=lambda x: x[1], reverse=True) FileUtil.writeSortedDic2File(res, "./nei_Sim.txt") return sim_matrix