def multiCore_PPILinkPred_shared(samplePPIbr, scoringMethod, scoreArgsDict, coreNo, topNo=None, logging=False, nodePairs=None): # @param scoreArgs: dict, assign the normalization functions (normFunc, uvSpec, xySpec, uvContrib, xyContrib, dualCN) normOrder = [ 'normFunc', 'uvSpec', 'xySpec', 'uvContrib', 'xyContrib', 'dualCN', 'uvJoin' ] scoreArgs = [ 'null' if normTag not in scoreArgsDict else scoreArgsDict[normTag] for normTag in normOrder ] samplePPIr = ns.BRToRelat(ns.toDualBR(samplePPIbr), rSet=True) sampleNodes = ns.BRToNode(samplePPIbr) if nodePairs is None: nodePairs = list(combinations(sampleNodes, 2)) splitStartIndex = [ i * math.floor(len(nodePairs) / coreNo) for i in range(0, coreNo) ] # both splitting is correct splitEndIndex = [(i + 1) * math.floor(len(nodePairs) / coreNo) if i != coreNo - 1 else len(nodePairs) for i in range(0, coreNo)] mgr, dataMgr = Manager(), Manager() PPIdataQ, PPIresQ = dataMgr.Queue(), mgr.Queue() if logging: logging = [True if i == 0 else False for i in range(coreNo)] else: logging = [False for i in range(coreNo)] for i in range(len(splitStartIndex)): PPIdataQ.put(nodePairs[splitStartIndex[i]:splitEndIndex[i]]) nodePairs = None args = (PPIdataQ, samplePPIr, scoringMethod, scoreArgs, logging, PPIresQ) func = partial(_multiCore_handler_shared, args) with Pool(coreNo) as p: p.map(func, [i for i in range(coreNo)]) if logging: print("\n") mergedScores, mergedPPIbrs = [], [] PPIresL = [PPIresQ.get() for i in range(coreNo)] for [predictedPPIbr, scores] in PPIresL: mergedScores += scores mergedPPIbrs += predictedPPIbr sortedPPIbrs, sortedScores = hr.sort_key_val(mergedPPIbrs, mergedScores) if topNo is None: topNo = len(sortedPPIbrs) topPredPPIbrs = sortedPPIbrs[0:topNo] topScores = sortedScores[0:topNo] return topPredPPIbrs, topScores
def PPILinkPred(samplePPIbr): samplePPIr = ns.BRToRelat(ns.toDualBR(samplePPIbr), rSet=True) sampleNodes = ns.BRToNode(samplePPIbr) nodePairs = list(combinations(sampleNodes, 2)) scores, predictedPPIbrs = [], [] for nodePair in nodePairs: [nodeX, nodeY] = nodePair if nodeY in samplePPIr[nodeX]: continue score = L3_normalization(samplePPIr, nodeX, nodeY) scores.append(score) predictedPPIbrs.append(nodePair) sortedPPIbrs, sortedScores = hr.sort_key_val(predictedPPIbrs, scores) return sortedPPIbrs, sortedScores
def STRING_homo_HPC_combine(): dataset_len = int( len([ *string.parse_STRING( ppiFile='./data/STRING/9606.protein.links.v11.0.txt', typeFile='./data/STRING/9606.protein.actions.v11.0.txt', uniProtMap= './data/UniProt/uniprot-taxonomy_9606_STRING.tab', root='../', wFile_GGI='./data/parsed/STRING_homo_GGI.pkl', wFile_PPI='./data/parsed/STRING_homo_PPI.pkl') ][1].index) * 0.5) dataset = "STRING" datasetName = "STRING_homo" for folder in os.listdir("./resultData/h**o/"): if dataset in folder: for trial in range(0, 10): print("folder: {}, trial: {}".format(folder, trial)) filenames = os.listdir("./resultData/h**o/" + folder) ppiFiles = [i for i in filenames if "PPI" in i] curTrialFiles = [ i for i in ppiFiles if i.split("_")[-3] == str(trial) ] topPPIs, topScores = [], [] for i in curTrialFiles: with open("./resultData/h**o/{}/{}".format(folder, i), "r") as f: topPPIs += json.loads(f.read()) with open( "./resultData/h**o/{}/{}".format( folder, "_".join(i.split("_")[:-1]) + "_score.json"), "r") as f: topScores += json.loads(f.read()) # sort topPPIs, topScores = hr.sort_key_val( topPPIs, topScores) topPPIs = topPPIs[0:dataset_len] topScores = topScores[0:dataset_len] with open( "./resultData/h**o/{}/{}_{}_trimmedPPIs.json". format(folder, curTrialFiles[0].split("_")[0], datasetName), "a+") as f: f.write(json.dumps(topPPIs) + "\n") with open( "./resultData/h**o/{}/{}_{}_trimmedScores.json". format(folder, curTrialFiles[0].split("_")[0], datasetName), "a+") as f: f.write(json.dumps(topScores) + "\n")
def bioGRID_homo_HPC_combine(): dataset_len = int( len([ *bg.parse_bioGRID( filename= './data/BioGRID/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.tab2.txt', wFile_GGI='./data/parsed/BioGRID_homo_GGI.pkl', wFile_PPI='./data/parsed/BioGRID_homo_PPI.pkl', root="../") ][1].index) * 0.5) dataset = "bioGRID" datasetName = "bioGRID_homo" for folder in os.listdir("./resultData/h**o/"): if dataset in folder: for trial in range(6, 10): print("folder: {}, trial: {}".format(folder, trial)) filenames = os.listdir("./resultData/h**o/" + folder) ppiFiles = [i for i in filenames if "PPI" in i] curTrialFiles = [ i for i in ppiFiles if i.split("_")[-3] == str(trial) ] topPPIs, topScores = [], [] for i in curTrialFiles: with open("./resultData/h**o/{}/{}".format(folder, i), "r") as f: topPPIs += json.loads(f.read()) with open( "./resultData/h**o/{}/{}".format( folder, "_".join(i.split("_")[:-1]) + "_score.json"), "r") as f: topScores += json.loads(f.read()) # sort topPPIs, topScores = hr.sort_key_val( topPPIs, topScores) topPPIs = topPPIs[0:dataset_len] topScores = topScores[0:dataset_len] with open( "./resultData/h**o/{}/{}_{}_trimmedPPIs.json". format(folder, curTrialFiles[0].split("_")[0], datasetName), "a+") as f: f.write(json.dumps(topPPIs) + "\n") with open( "./resultData/h**o/{}/{}_{}_trimmedScores.json". format(folder, curTrialFiles[0].split("_")[0], datasetName), "a+") as f: f.write(json.dumps(topScores) + "\n")