Esempio n. 1
0
def con_single_layer_net(edges, filter_value=0.1):

    disease, pathway = NetUtil.getNodes2HeterNet(edges)
    disease = list(disease)

    print("1st col -> {}\t 2nd col -> {}".format(len(disease), len(pathway)))

    nei_sim_matrix = cal_nei_sim(disease, edges, save_nei_sim=True)
    path_sim_matrix = cal_path_sim(disease, edges, save_path_sim=True)

    layer_net = []
    layer_net_result = {}
    for i in range(0, len(disease)):
        for j in range(i + 1, len(disease)):
            fusion_sim = 0.5 * nei_sim_matrix[i][j] + 0.5 * path_sim_matrix[i][
                j]
            if fusion_sim > filter_value:
                layer_net.append([disease[i], disease[j], fusion_sim])
                layer_net_result["{}\t{}".format(disease[i],
                                                 disease[j])] = fusion_sim

    print("complete a single layer similarity network...")
    layer_net_result = common.sortDict(layer_net_result)
    FileUtil.writeSortedDic2File(layer_net_result, "./layer_net.txt")

    return layer_net
Esempio n. 2
0
def cal_path_sim(disease, edges, save_path_sim=False):

    print("begin calculate similarity based on path...")
    pathway = list(NetUtil.getColNodes(edges, col=1))

    ajaMatrix = np.zeros((len(disease), len(pathway)))
    for line in edges:
        row_index = disease.index(line[0])
        col_index = pathway.index(line[1])
        ajaMatrix[row_index][col_index] = float(1)

    W = np.dot(ajaMatrix, ajaMatrix.T)

    print("construct similarity matrix...")
    pathSim = {}
    sim_matrix = np.zeros((len(disease), len(disease)))
    for i in range(0, len(disease)):
        for j in range(i + 1, len(disease)):
            # if W[i][j] != 0:
            pathSim["{}\t{}".format(
                disease[i], disease[j])] = 2 * W[i][j] / (W[i][i] + W[j][j])
            sim_matrix[i][j] = 2 * W[i][j] / (W[i][i] + W[j][j])
            sim_matrix[j][i] = 2 * W[i][j] / (W[i][i] + W[j][j])

    if save_path_sim:
        print("sort the path similarity and save...")
        res = sorted(pathSim.items(), key=lambda x: x[1], reverse=True)
        FileUtil.writeSortedDic2File(res, "./path_sim.txt")

    return sim_matrix
Esempio n. 3
0
def calculateDisSim(disease_microbe, output_file):
    '''

    :param disease_microbe: 二维list,表示disease-microbe network
    :param output_file: str,表示结果保存的路径
    :return:
    '''

    begin_time = time.clock()
    microbe2disease = defaultdict(set)
    disease2microbe = defaultdict(set)

    for line in disease_microbe:
        microbe2disease[line[0]].add(line[1])
        disease2microbe[line[1]].add(line[0])

    print("there are {} diseases and {} microbes in disease-microbe.".format(
        len(disease2microbe.keys()), len(microbe2disease.keys())))

    diseases = list(disease2microbe.keys())
    microbes = list(microbe2disease.keys())
    weight = np.zeros((len(disease2microbe.keys()), len(microbe2disease)))
    E = np.ones((len(disease2microbe.keys()), len(microbe2disease)))

    for line in disease_microbe:
        indexRow = diseases.index(line[1])
        indexCol = microbes.index(line[0])

        weight[indexRow][indexCol] += 1
        if line[3] == "increase":
            E[indexRow][indexCol] = 1
        elif line[3] == "decrease":
            E[indexRow][indexCol] = -1

    for indexRow in range(0, len(diseases)):
        for indexCol in range(0, len(microbes)):
            # print math.log(diseaseNum / len(n.get(microbeList[indexCol], 2)))
            weight[indexRow][indexCol] *= E[indexRow][indexCol] * math.log2(
                float(len(diseases)) /
                len(microbe2disease[microbes[indexCol]]))

    # ------------------------------------------------------------------
    MicrobeSim = {}
    for i in range(0, len(diseases)):
        for j in range(i + 1, len(diseases)):
            cosine_value = common.cosinValue(weight[i], weight[j])
            if cosine_value != 0:
                MicrobeSim["{}\t{}".format(diseases[i],
                                           diseases[j])] = cosine_value

    MicrobeSim = common.sortDict(MicrobeSim)
    FileUtil.writeSortedDic2File(MicrobeSim, output_file)
    end_time = time.clock()

    print("MicrobeSim costs {}s".format(end_time - begin_time))

    pass
Esempio n. 4
0
def calculateDisSim(seed_list, net, output_path):

    '''

    :param seed_list: dict,表示disease和其对应的genes。key:str,表示disease,value:set,表示genes
    :param net: 二维list,表示一个PPI网络
    :param output_path: str,表示保存结果的路径
    :return:
    '''
    nodes = NetUtil.getNodes2HomoNet(net)

    print("there are {} diseases.".format(len(seed_list)))

    FRValueMatrix = np.zeros((len(seed_list), len(seed_list)))
    rowOfFR = 0
    time1 = time.clock()
    for disease, genesOfDisease in seed_list.items():

        leavaList = getCommonNodes(nodes, genesOfDisease)
        wk = walker.Walker(net)

        if len(leavaList) > 0:
            # run RWR(Random walk and restart),then get the proportion of all nodes
            temp_time1 = time.clock()
            nodesPercent = wk.run_exp(leavaList, 0.7, 1)
            temp_time = time.clock()
            print("{} - {} -> genes = {}, it cost {}s".format(rowOfFR, disease, len(leavaList), temp_time - temp_time1))

            # calculate the FR_GeneSet's value
            colOfFR = 0
            for disease2, genesOfDisease2 in seed_list.items():
                FR = 0
                for gene in genesOfDisease2:
                    if gene in nodes:
                        FR += float(nodesPercent[gene])
                    elif gene in genesOfDisease:
                        FR += 1
                    else:
                        FR += 0
                FRValueMatrix[rowOfFR][colOfFR] = FR
                colOfFR += 1

        rowOfFR += 1

    print("begin to calculate NetSim value")
    # calculate the NetSim value of a pair of disease
    NetSimMatrix = np.zeros((len(seed_list), len(seed_list)))
    NetSimList = list(seed_list.keys())
    rowOfFP = 0
    for disease, genesOfDisease in seed_list.items():

        colOfFP = 0
        diseseGeneNum = len(genesOfDisease)
        for disease2, genesOfDisease2 in seed_list.items():
            if disease is not disease2:
                disease2GeneNum = len(genesOfDisease2)
                NetSimMatrix[rowOfFP][colOfFP] = (FRValueMatrix[rowOfFP][colOfFP] +
                            FRValueMatrix[colOfFP][rowOfFP]) / (diseseGeneNum + disease2GeneNum)
            colOfFP += 1
        rowOfFP += 1

    print("write the 'disease-diesae-value' to a file")
    simiResult = {}
    row, col = np.shape(NetSimMatrix)
    for i in range(0, row):
        for j in range(i + 1, col):
            if NetSimMatrix[i][j] > 0:
                simiResult['{}\t{}'.format(NetSimList[i], NetSimList[j])] = NetSimMatrix[i][j]

    sortedSimiResult = sorted(simiResult.items(), key=lambda x: x[1], reverse=True)
    FileUtil.writeSortedDic2File(sortedSimiResult,  output_path)
    print("end")
    time2 = time.clock()
    print("NetSim total cost {}s.".format(time2-time1))

    pass
Esempio n. 5
0
def cal_nei_sim(disease, edges, save_nei_sim=False):

    print("begin to calculate similarity based on neighbours...")

    G = nx.Graph()
    G.add_edges_from(edges)  # 将多种生物信息构造成异构矩阵

    print(
        "step 1: epsilon -> 2, calculate first degree sequence and second degree sequence..."
    )
    DegreeSequence1 = []
    DegreeSequence2 = []

    for di in disease:

        neighboursOne = G.neighbors(di)  #获取节点的第一层邻居
        degreeOfOne = []
        neghboursTwo = []
        for indexOfNeighbours in neighboursOne:
            degreeOfOne.append(nx.degree(G,
                                         indexOfNeighbours))  #保存第一层邻居的degree
            neghboursTwo.extend(G.neighbors(indexOfNeighbours))  #获取第一层邻居节点的邻居
        sortedDegreeOfOne = sorted(degreeOfOne)  #对第一层邻居的degree进行排序
        DegreeSequence1.append(sortedDegreeOfOne)

        neghboursTwo = set(neghboursTwo)
        neghboursTwo.remove(di)  #去除二层邻居节点的自己

        degreeOfTwo = []
        for indexOfNeighbours in neghboursTwo:
            degreeOfTwo.append(nx.degree(G,
                                         indexOfNeighbours))  #保存第二层邻居的degree
        sortedDegreeOfTwo = sorted(degreeOfTwo)  #对第一层邻居的degree进行排序
        DegreeSequence2.append(sortedDegreeOfTwo)

    cores = multiprocessing.cpu_count()  # 获取计算机CPU数目
    pool = multiprocessing.Pool(cores)  # 构造一个线程池
    print("step 2: compute neighbour_sim in parallel with {} cpus...".format(
        cores))

    # 构造一个多线程的任务
    resultsOne = [
        pool.apply_async(dtw_distance_fast,
                         (DegreeSequence1[i], DegreeSequence1[j]))
        for i in range(0, len(DegreeSequence1))
        for j in range(i + 1, len(DegreeSequence1))
    ]

    # 将成对的第一层degree sequence计算结果存储到数组中
    arrOne = np.zeros((len(DegreeSequence1), len(DegreeSequence1)))
    i = 0
    j = 1
    for r in resultsOne:
        if j == len(DegreeSequence1):
            i += 1
            j = i + 1
        arrOne[i][j] = float(r.get())
        j += 1

    # 构造一个多线程任务
    resultsTwo = [
        pool.apply_async(dtw_distance_fast,
                         (DegreeSequence2[i], DegreeSequence2[j]))
        for i in range(0, len(DegreeSequence2))
        for j in range(i + 1, len(DegreeSequence2))
    ]

    # 将成对的第二层degree sequence计算结果存储到数组中
    arrTwo = np.zeros((len(DegreeSequence2), len(DegreeSequence2)))
    i = 0
    j = 1
    for r in resultsTwo:
        if j == len(DegreeSequence2):
            i += 1
            j = i + 1
        arrTwo[i][j] = float(r.get())
        j += 1

    # ----------------------------------------------------------------------------
    print("step 3: construct similarity matrix...")
    alpha = 0.5  # a decaying weight factor α in the range between 0 and 1
    NeiSim = {}
    sim_matrix = np.zeros((len(disease), len(disease)))
    for i in range(0, len(disease)):
        for j in range(i + 1, len(disease)):
            distance = math.pow(alpha, 1) * arrOne[i][j] + math.pow(
                alpha, 2) * arrTwo[i][j]
            NeiSim["{}\t{}".format(disease[i],
                                   disease[j])] = math.exp(-distance)
            sim_matrix[i][j] = math.exp(-distance)
            sim_matrix[j][i] = math.exp(-distance)

    if save_nei_sim:
        print("sort the path similarity and save...")
        res = sorted(NeiSim.items(), key=lambda x: x[1], reverse=True)
        FileUtil.writeSortedDic2File(res, "./nei_Sim.txt")

    return sim_matrix