Esempio n. 1
0
def multiCore_PPILinkPred_shared(samplePPIbr,
                                 scoringMethod,
                                 scoreArgsDict,
                                 coreNo,
                                 topNo=None,
                                 logging=False,
                                 nodePairs=None):
    # @param scoreArgs: dict, assign the normalization functions (normFunc, uvSpec, xySpec, uvContrib, xyContrib, dualCN)
    normOrder = [
        'normFunc', 'uvSpec', 'xySpec', 'uvContrib', 'xyContrib', 'dualCN',
        'uvJoin'
    ]
    scoreArgs = [
        'null' if normTag not in scoreArgsDict else scoreArgsDict[normTag]
        for normTag in normOrder
    ]

    samplePPIr = ns.BRToRelat(ns.toDualBR(samplePPIbr), rSet=True)
    sampleNodes = ns.BRToNode(samplePPIbr)
    if nodePairs is None: nodePairs = list(combinations(sampleNodes, 2))

    splitStartIndex = [
        i * math.floor(len(nodePairs) / coreNo) for i in range(0, coreNo)
    ]  # both splitting is correct
    splitEndIndex = [(i + 1) * math.floor(len(nodePairs) / coreNo)
                     if i != coreNo - 1 else len(nodePairs)
                     for i in range(0, coreNo)]
    mgr, dataMgr = Manager(), Manager()
    PPIdataQ, PPIresQ = dataMgr.Queue(), mgr.Queue()
    if logging: logging = [True if i == 0 else False for i in range(coreNo)]
    else: logging = [False for i in range(coreNo)]

    for i in range(len(splitStartIndex)):
        PPIdataQ.put(nodePairs[splitStartIndex[i]:splitEndIndex[i]])
    nodePairs = None

    args = (PPIdataQ, samplePPIr, scoringMethod, scoreArgs, logging, PPIresQ)
    func = partial(_multiCore_handler_shared, args)
    with Pool(coreNo) as p:
        p.map(func, [i for i in range(coreNo)])
    if logging: print("\n")
    mergedScores, mergedPPIbrs = [], []
    PPIresL = [PPIresQ.get() for i in range(coreNo)]
    for [predictedPPIbr, scores] in PPIresL:
        mergedScores += scores
        mergedPPIbrs += predictedPPIbr

    sortedPPIbrs, sortedScores = hr.sort_key_val(mergedPPIbrs, mergedScores)
    if topNo is None: topNo = len(sortedPPIbrs)
    topPredPPIbrs = sortedPPIbrs[0:topNo]
    topScores = sortedScores[0:topNo]
    return topPredPPIbrs, topScores
Esempio n. 2
0
def PPILinkPred(samplePPIbr):
    samplePPIr = ns.BRToRelat(ns.toDualBR(samplePPIbr), rSet=True)
    sampleNodes = ns.BRToNode(samplePPIbr)
    nodePairs = list(combinations(sampleNodes, 2))
    scores, predictedPPIbrs = [], []
    for nodePair in nodePairs:
        [nodeX, nodeY] = nodePair
        if nodeY in samplePPIr[nodeX]: continue
        score = L3_normalization(samplePPIr, nodeX, nodeY)
        scores.append(score)
        predictedPPIbrs.append(nodePair)
    sortedPPIbrs, sortedScores = hr.sort_key_val(predictedPPIbrs, scores)
    return sortedPPIbrs, sortedScores
Esempio n. 3
0
    def STRING_homo_HPC_combine():
        dataset_len = int(
            len([
                *string.parse_STRING(
                    ppiFile='./data/STRING/9606.protein.links.v11.0.txt',
                    typeFile='./data/STRING/9606.protein.actions.v11.0.txt',
                    uniProtMap=
                    './data/UniProt/uniprot-taxonomy_9606_STRING.tab',
                    root='../',
                    wFile_GGI='./data/parsed/STRING_homo_GGI.pkl',
                    wFile_PPI='./data/parsed/STRING_homo_PPI.pkl')
            ][1].index) * 0.5)

        dataset = "STRING"
        datasetName = "STRING_homo"
        for folder in os.listdir("./resultData/h**o/"):
            if dataset in folder:
                for trial in range(0, 10):
                    print("folder: {}, trial: {}".format(folder, trial))
                    filenames = os.listdir("./resultData/h**o/" + folder)
                    ppiFiles = [i for i in filenames if "PPI" in i]
                    curTrialFiles = [
                        i for i in ppiFiles if i.split("_")[-3] == str(trial)
                    ]

                    topPPIs, topScores = [], []
                    for i in curTrialFiles:
                        with open("./resultData/h**o/{}/{}".format(folder, i),
                                  "r") as f:
                            topPPIs += json.loads(f.read())
                        with open(
                                "./resultData/h**o/{}/{}".format(
                                    folder, "_".join(i.split("_")[:-1]) +
                                    "_score.json"), "r") as f:
                            topScores += json.loads(f.read())
                        # sort
                        topPPIs, topScores = hr.sort_key_val(
                            topPPIs, topScores)
                        topPPIs = topPPIs[0:dataset_len]
                        topScores = topScores[0:dataset_len]

                    with open(
                            "./resultData/h**o/{}/{}_{}_trimmedPPIs.json".
                            format(folder, curTrialFiles[0].split("_")[0],
                                   datasetName), "a+") as f:
                        f.write(json.dumps(topPPIs) + "\n")
                    with open(
                            "./resultData/h**o/{}/{}_{}_trimmedScores.json".
                            format(folder, curTrialFiles[0].split("_")[0],
                                   datasetName), "a+") as f:
                        f.write(json.dumps(topScores) + "\n")
Esempio n. 4
0
    def bioGRID_homo_HPC_combine():
        dataset_len = int(
            len([
                *bg.parse_bioGRID(
                    filename=
                    './data/BioGRID/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.tab2.txt',
                    wFile_GGI='./data/parsed/BioGRID_homo_GGI.pkl',
                    wFile_PPI='./data/parsed/BioGRID_homo_PPI.pkl',
                    root="../")
            ][1].index) * 0.5)

        dataset = "bioGRID"
        datasetName = "bioGRID_homo"
        for folder in os.listdir("./resultData/h**o/"):
            if dataset in folder:
                for trial in range(6, 10):
                    print("folder: {}, trial: {}".format(folder, trial))
                    filenames = os.listdir("./resultData/h**o/" + folder)
                    ppiFiles = [i for i in filenames if "PPI" in i]
                    curTrialFiles = [
                        i for i in ppiFiles if i.split("_")[-3] == str(trial)
                    ]

                    topPPIs, topScores = [], []
                    for i in curTrialFiles:
                        with open("./resultData/h**o/{}/{}".format(folder, i),
                                  "r") as f:
                            topPPIs += json.loads(f.read())
                        with open(
                                "./resultData/h**o/{}/{}".format(
                                    folder, "_".join(i.split("_")[:-1]) +
                                    "_score.json"), "r") as f:
                            topScores += json.loads(f.read())
                        # sort
                        topPPIs, topScores = hr.sort_key_val(
                            topPPIs, topScores)
                        topPPIs = topPPIs[0:dataset_len]
                        topScores = topScores[0:dataset_len]

                    with open(
                            "./resultData/h**o/{}/{}_{}_trimmedPPIs.json".
                            format(folder, curTrialFiles[0].split("_")[0],
                                   datasetName), "a+") as f:
                        f.write(json.dumps(topPPIs) + "\n")
                    with open(
                            "./resultData/h**o/{}/{}_{}_trimmedScores.json".
                            format(folder, curTrialFiles[0].split("_")[0],
                                   datasetName), "a+") as f:
                        f.write(json.dumps(topScores) + "\n")