def mergeG2(): fin = open("%s/FSUBTEST/2/GFileMap.txt" % params.JADER_OUT) fout = open("%s/FSUBTEST/2/G2.txt" % params.JADER_OUT, "w") dDrug2Se = dict() while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("\t") hashFile = parts[0] f = open("%s/FSUBTEST/2/%s" % (params.JADER_OUT, hashFile)) while True: l = f.readline() if l == "": break parts = l.strip().split("_") drug = parts[0] se = parts[1].split("\t")[0] ses = utils.get_insert_key_dict(dDrug2Se, drug, []) ses.append(se) # print(drug, ses) f.close() for k, v in dDrug2Se.items(): fout.write("%s\t%s\n" % (k, ",".join(v))) fout.close()
def plotDrugLength2NSEs(): import numpy as np dDrugLength2NSes = dict() fin = open("%s/JADER2AllSeList.txt" % params.JADER_OUT) while True: line = fin.readline() if line == "": break parts = line.strip().split("$") drugCombs = parts[0].split(":")[0] nDrug = len(drugCombs.split(",")) nSe = int(parts[1]) seLengths = utils.get_insert_key_dict(dDrugLength2NSes, nDrug, []) seLengths.append(nSe) x = dDrugLength2NSes.keys() xmax = max(x) x = [i for i in range(1, xmax + 1)] y = np.zeros(xmax) for k, v in dDrugLength2NSes.items(): # avg = sum(v) / len(v) y[k-1] = np.median(v) import matplotlib.pyplot as plt plt.scatter(x,y) plt.xlabel("DrugComb length") plt.ylabel("Median SEs") plt.tight_layout() plt.savefig("%s/%s.png" % (params.FIG_DIR, "JADERAvgSEDrugCombLength"))
def exportReactionsFile(): fin = codecs.open("%s/reactions.txt" % CAD_FOLDER_INP) fout = open("%s/Reactions1.txt" % params.CAD_OUT, "w") dId2Ses = dict() while True: line = fin.readline() if line == "": break ios = io.StringIO(line.strip().lower()) vv = list(csv.reader(ios, delimiter='$'))[0] # print( vv) sId = vv[1] seName = vv[5] isInValid = False for invalidSe in invalidSes: if seName.__contains__(invalidSe): isInValid = True break if isInValid: continue seList = utils.get_insert_key_dict(dId2Ses, sId, set()) seList.add(seName) # print(cId, currentDrugs) fin.close() for k,v in dId2Ses.items(): fout.write("%s$%s\n" % (k, ",".join(list(v)))) fout.close()
def exportSubG2(): fin = open("%s/JADER.txt" % params.JADER_OUT) foutDict = dict() dlen2SeCount = dict() nA = 0 print("Reading...") while True: line = fin.readline() if line == "": break nA += 1 print("\r%s" % nA, end="") parts = line.strip().split("$") drugCmb = parts[0] ses = parts[1] drugs = drugCmb.split(",") nD = len(drugs) drugs = sorted(drugs) sortNames = ",".join(drugs) fO = utils.get_dict(foutDict, nD, -1) if fO == -1: fO = open("%s/SUB/G%s" % (params.JADER_OUT, nD), "w") foutDict[nD] = fO fO.write("%s$%s\n" % (sortNames, ses)) if len(drugs) > 2 and len(drugs) <= 20: for i in range(len(drugs)): for j in range(i + 1, len(drugs)): d1 = drugs[i] d2 = drugs[j] pair = "%s,%s" % (d1, d2) try: f2 = foutDict[2] except: f2 = open("%s/SUB/G%s" % (params.JADER_OUT, 2), "w") foutDict[2] = f2 f2.write("%s$%s\n" % (pair, ses)) len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict()) sess = ses.split(",") for se in sess: utils.add_dict_counter(len2SeCount, se) for k, v in foutDict.items(): v.close() d2 = dict() for k, v in dlen2SeCount.items(): kvs = utils.sort_dict(v) ks = [] for kv in kvs: kk, _ = kv ks.append(kk) d2[k] = ks utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.JADER_OUT)
def exportDrugCom2Side(): fin = open("%s/JADER.txt" % params.JADER_OUT) fout = open("%s/JADER2AllSeList.txt" % params.JADER_OUT, "w") dDrugComb2Se = dict() dDrugCombCount = dict() dDrugCom2Lenght = dict() drugCont = dict() seCount = dict() cc = 0 while True: line = fin.readline() if line == "": break cc += 1 line = line.strip() parts = line.split("$") drugCom = parts[0] dDrugCom2Lenght[drugCom] = len(drugCom.split(",")) ses = parts[1].split(",") utils.add_dict_counter(dDrugCombCount, drugCom, 1) for drug in drugCom.split(","): utils.add_dict_counter(drugCont, drug, 1) sesComb = utils.get_insert_key_dict(dDrugComb2Se, drugCom, dict()) for se in ses: utils.add_dict_counter(sesComb, se, 1) utils.add_dict_counter(seCount, se) kvs = utils.sort_dict(dDrugCombCount) for kv in kvs: k, v = kv seCountKv = utils.sort_dict(dDrugComb2Se[k]) sString = [] for seCountx in seCountKv: se,count = seCountx sString.append("%s:%s"% (se, count)) fout.write("%s:%s$%s$%s\n" % (k, v, len(sString), ",".join(sString))) fout.close() utils.save_obj(seCount, "%s/JADERSeCountFX" % params.JADER_OUT) utils.save_obj(dDrugCom2Lenght, "%s/DrugCombLength" % params.JADER_OUT) print(len(drugCont), len(seCount))
def exportSub(): fin = open("%s/FDrug2SeList_19814.txt" % params.FADER_OUT) foutDict = dict() dlen2SeCount = dict() nA = 0 print("Reading...") while True: line = fin.readline() if line == "": break nA += 1 print("\r%s" % nA, end="") parts = line.strip().split("$") drugCmb = parts[0] ses = parts[1] drugs = drugCmb.split(",") nD = len(drugs) sortNames = ",".join(sorted(drugs)) fO = utils.get_dict(foutDict, nD, -1) if fO == -1: fO = open("%s/SUB/%s" % (params.FADER_OUT, nD), "w") foutDict[nD] = fO fO.write("%s$%s\n" % (sortNames, ses)) len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict()) sess = ses.split(",") for se in sess: utils.add_dict_counter(len2SeCount, se) for k, v in foutDict.items(): v.close() d2 = dict() for k, v in dlen2SeCount.items(): kvs = utils.sort_dict(v) ks = [] for kv in kvs: kk, _ = kv ks.append(kk) d2[k] = ks utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.FADER_OUT)
def exportPolySEs(): drugDesMap = utils.load_obj("%s/DrugBank/DrugMorganDes" % params.DATA_DIR) seDict = dict() dComb2Se = dict() fin = open("%s/FTest/FileMap.txt" % params.FADER_OUT) hashFiles = fin.readlines() ln = min(N_FILE, len(hashFiles)) hashFiles = hashFiles[:ln] for hashId in hashFiles: parts = hashId.strip().split("\t") hashId = parts[0] ses = parts[1].split("__") for se in ses: utils.get_update_dict_index(seDict, se) path = "%s/FTest/%s" % (params.FADER_OUT, hashId) print("Reading... ", path) polySes = open(path).readlines() for polySe in polySes: polySe = polySe.strip().split("_") drugComb = polySe[0] seParts = polySe[1].split("\t") se = seParts[0] if seParts[1] == 'inf': pass drugs = drugComb.split(",") isValidComb = True # print(drugs) for drug in drugs: if drug not in drugDesMap: isValidComb = False break if isValidComb: # print(drugComb) sel = utils.get_insert_key_dict(dComb2Se, drugComb, []) sel.append(se) fout = open("%s/PolySes.txt" % params.FADER_OUT, "w") for drugComb, ses in dComb2Se.items(): fout.write("%s\t%s\n" % (drugComb, ",".join(ses)) ) fout.close()
def exportPolySE(): fin = open("%s/%s" % (OUT_DIR, "ttStatsRe")) dDrugPair2Se = dict() while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("\t") drugPairs = parts[0] se = parts[1] seList = utils.get_insert_key_dict(dDrugPair2Se, drugPairs, []) seList.append(se) fin.close() fin = open("%s/Data/DrugBank/DrugBankNames.txt" % params.C_DIR) dName2Inchi = dict() while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("||") drugName = parts[0] inchi = parts[3] dName2Inchi[drugName] = inchi fin.close() fout = open("%s/%s" % (OUT_DIR, "CPolySE"), "w") for dp, ses in dDrugPair2Se.items(): d1, d2 = dp.split(",") i1, i2 = utils.get_dict(dName2Inchi, d1, -1), utils.get_dict(dName2Inchi, d2, -1) if i1 == -1 or i2 == -1: continue if len(i1) < 2 or len(i2) < 2: continue fout.write("%s|%s|%s|%s|%s\n" % (d1, d2, i1, i2, ",".join(ses))) fout.close()
def merger(): fin = open("%s/ReportDrug1.txt" % params.CAD_OUT) fout = open("%s/ReportDrug2.txt" % params.CAD_OUT, "w") dCout = dict() nError = 0 cc = 0 dId2Drugs = dict() while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("$") id = parts[0] drugs = parts[1].split(",") drugSet = utils.get_insert_key_dict(dId2Drugs, id, set()) for drug in drugs: drugSet.add(drug) fin.close() for k, v in dId2Drugs.items(): fout.write("%s$%s\n" % (k, ",".join(sorted(list(v))))) fout.close()
def filterDrugMatching2(): salts = set() saltStrings = open("%s/rawMatching/Salt.txt" % params.OUTPUT_DIR).readlines() for line in saltStrings: salts.add(line.strip()) fin = open("%s/rawMatching/MatchingDrug2.txt" % params.OUTPUT_DIR) d = dict() while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("||") match2 = utils.get_insert_key_dict(d, parts[0], set()) mD = parts[-1] if mD in salts and not parts[0].startswith('hydro'): continue if parts[0].__contains__('sodium chloride'): if mD == 'chloride ion' or mD == 'chlorine': continue match2.add(parts[-1]) fin.close() fout1 = open("%s/rawMatching/MatchingDrug2_1.txt" % params.OUTPUT_DIR, "w") fout2 = open("%s/rawMatching/MatchingDrug2_2.txt" % params.OUTPUT_DIR, "w") for k, v in d.items(): v = list(v) if len(v) == 1: fout1.write("%s||%s\n" % (k, v[0])) else: fout2.write("%s||%s\n" % (k, ",".join(v))) fout1.close() fout2.close()
def exportIndicationFile(): fin = codecs.open("%s/report_drug_indication.txt" % CAD_FOLDER_INP) fout = open("%s/Indications1.txt" % params.CAD_OUT, "w") dId2Ses = dict() while True: line = fin.readline() if line == "": break ios = io.StringIO(line.strip().lower()) vv = list(csv.reader(ios, delimiter='$'))[0] # print( vv) sId = vv[1] indcName = vv[4] indcList = utils.get_insert_key_dict(dId2Ses, sId, set()) indcList.add(indcName) # print(cId, currentDrugs) fin.close() for k,v in dId2Ses.items(): fout.write("%s$%s\n" % (k, ",".join(list(v)))) fout.close()
def exportBySE(seNames, pathIn, dirOut, pathInfo): fin = open(pathIn) dCombCount = dict() dCombSe = dict() dSe = dict() nA = 0 print("Reading...") if not type(seNames) == set: seNames = set(seNames) print(seNames) while True: line = fin.readline() if line == "": break nA += 1 parts = line.strip().split("$") drugCmb = parts[0] ses = parts[1] ses = set(ses.split(",")) for se in seNames: dCombCountx = utils.get_insert_key_dict(dCombCount, se, dict()) utils.add_dict_counter(dCombCountx, drugCmb) if se in ses: dComSEx = utils.get_insert_key_dict(dCombSe, se, dict()) utils.add_dict_counter(dSe, se) utils.add_dict_counter(dComSEx, drugCmb) fin.close() print("Cal Contingency table...") dContigenTable = dict() for se in seNames: dCombCountx = dCombCount[se] dComSEx = utils.get_dict(dCombSe, se, dict()) nSe = utils.get_dict(dSe, se, 0) if nSe == 0: continue for drugComb, nComb in dCombCountx.items(): ar = np.zeros((2, 2)) nCombSe = utils.get_dict(dComSEx, drugComb, 0) if nCombSe == 0: # print("SKIP") continue ar[0, 0] = nCombSe ar[1, 0] = nComb - nCombSe ar[0, 1] = nSe - nCombSe ar[1, 1] = nA - (nComb + nSe - nCombSe) nName = "%s_%s" % (drugComb, se) dContigenTable[nName] = ar producers = [] consumers = [] queue = Queue(params.K_FOLD) counter = Value('i', 0) counter2 = Value('i', 0) inputList = list(dContigenTable.items()) nInputList = len(inputList) nDPerWorker = int(nInputList / params.N_DATA_WORKER) # assert 'g-csf' in allDrugNames for i in range(params.N_DATA_WORKER): startInd = i * nDPerWorker endInd = (i + 1) * nDPerWorker endInd = min(endInd, nInputList) if i == params.N_DATA_WORKER - 1: endInd = nInputList data = inputList[startInd:endInd] producers.append(Process(target=producer, args=(queue, data))) sname = "__".join(list(seNames)) seNameString = "%s" % hash(sname) fFileNameMap = open(pathInfo, "a") fFileNameMap.write("%s\t%s\n" % (seNameString, sname)) fFileNameMap.close() fout = open("%s/%s" % (dirOut, seNameString), "w") p = Process(target=consumer, args=(queue, counter, counter2, fout, [])) p.daemon = True consumers.append(p) print("Start Producers...") for p in producers: p.start() print("Start Consumers...") for p in consumers: p.start() for p in producers: p.join() print("Finish Producers") queue.put(None) while True: if counter.value == 0: time.sleep(0.01) continue else: break fout.flush() fout.close()
def exportAllDict1(): dDict1 = dict() # Perfect matching: fin = open("%s/rawMatching/MatchingDrug1.txt" % params.OUTPUT_DIR, "r") while True: line = fin.readline() if line == "": break parts = line.strip().split("||") t = utils.get_insert_key_dict(dDict1, parts[0], set()) t.add(parts[1]) fin.close() # Salt: fin = open("%s/rawMatching/Salt.txt" % params.OUTPUT_DIR) lines = fin.readlines() salts = set() for salt in lines: salt = salt.strip() if salt.__contains__("#"): continue salts.add(salt) # Partial matching: dHardDrug, _, _ = loadDrugBankNames() fin = open("%s/rawMatching/MatchingDrug2.txt" % params.OUTPUT_DIR) lines = fin.readlines() fin.close() for line in lines: line = line.strip() parts = line.split("||") if parts[1] in salts: continue drugBankName = dHardDrug[parts[1]] jaderName = parts[0] t = utils.get_insert_key_dict(dDict1, jaderName, set()) t.add(drugBankName) # Typos fin = open("%s/typosMatching/MatchingDrugTypos.txt" % params.OUTPUT_DIR) lines = fin.readlines() fin.close() for line in lines: line = line.strip() if line.__contains__("#"): continue parts = line.split("||") t = utils.get_insert_key_dict(dDict1, parts[0], set()) t.add(parts[-1]) fout = open("%s/finalMap/DrugMap1.txt" % params.OUTPUT_DIR, "w") fout2 = open("%s/finalMap/DrugMap2.txt" % params.OUTPUT_DIR, "w") for k, v in dDict1.items(): v = list(v) if len(v) == 1: fout.write("%s||%s\n" % (k, v[0])) else: fout2.write("%s||%s\n" % (k, "|".join(v))) fout.close() fout2.close()