def compareTools(reg1, reg2) : regs = lc.getFragments(reg1, reg2) comp = lc.doComparison2(regs, reg1, reg2) mat = ls.doContingency(comp) jcc = ls.jaccardIndex(comp) num = ls.regionNumber(regs) base = ls.baseSimilarity(regs, reg1, reg2) regions = ls.regSimilarity(regs, reg1, reg2) jcca = getJaccard(comp, "A") jccn = getJaccard(comp, "N") jccl = getJaccard(comp, "L") jccd = getJaccard(comp, "D") st = "{nr}\t{bs}\t{rs}\t{mcca}\t{mccn}\t{mccl}\t{mccd}\t{jcc}\t{jcca}\t{jccn}\t{jccl}\t{jccd}\t{pur1}\t{pur2}\t{plo1}\t{plo2}".format( nr = num, bs = base, rs = regions, mcca = mat["A"]["MCC"], mccn = mat["N"]["MCC"], mccl = mat["L"]["MCC"], mccd = mat["D"]["MCC"], jcc = jcc, jcca = jcca, jccn = jccn, jccl = jccl, jccd = jccd, pur1 = reg1["purity"], pur2 = reg2["purity"], plo1 = reg1["ploidy"], plo2 = reg2["ploidy"]) return st
def launchAnalysis(folder, array, ascat, facets): pythonpath = os.path.dirname(os.path.realpath(__file__)) #Go to the working directory to run the comparison analysis print("INFO: Analysing {}".format(folder)) os.chdir(folder) ascatReg = None facetsReg = None arrayReg = None arrayPath = "../{}".format(array) id = folder.split("/")[-1] if ascat != "": ascatReg = comp.convert2region(ascat, "ascat") if facets != "": facetsReg = comp.convert2region(facets, "facets") arrayReg = comp.convert2region(arrayPath, "array") #if ascatReg != None and facetsReg != None : #Calculate logR between AscatNGS and FACETS #regA_F = comp.getFragments(ascatReg, facetsReg) #st.logRcomp(regA_F, ascatReg, facetsReg, "ascatNGS", "FACETS") if facetsReg != None and arrayReg != None: regAr_F = comp.getFragments(arrayReg, facetsReg) mt1 = comp.doComparison2(regAr_F, facetsReg, arrayReg) jc1 = st.jaccardIndex(mt1, ["A", "D"]) mt1 = comp.doComparison(regAr_F, facetsReg, arrayReg) cm1 = st.doContingency(mt1, ["A", "D"]) if ascatReg != None and arrayReg != None: regAr_A = comp.getFragments(arrayReg, ascatReg) mt2 = comp.doComparison2(regAr_F, ascatReg, arrayReg) jc2 = st.jaccardIndex(mt2, ["A", "D"]) mt2 = comp.doComparison(regAr_F, ascatReg, arrayReg) cm2 = st.doContingency(mt2, ["A", "D"]) #Return to current python path os.chdir(pythonpath) #Store the summary data in the corresponding files if ascatReg != None: with open("ascatPurities.txt", "a") as fi: fi.write("{}\t{}\n".format(id, ascatReg["purity"])) with open("ascatPloidies.txt", "a") as fi: fi.write("{}\t{}\n".format(id, ascatReg["ploidy"])) with open("ascatJaccard.txt", "a") as fi: fi.write("{}\t{}\n".format(id, jc2)) with open("ascatACCamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["ACC"])) with open("ascatTPRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["TPR"])) with open("ascatTNRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["TNR"])) with open("ascatPPVamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["PPV"])) with open("ascatFDRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["ACC"])) with open("ascatACCdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["ACC"])) with open("ascatTPRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["TPR"])) with open("ascatTNRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["TNR"])) with open("ascatPPVdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["PPV"])) with open("ascatFDRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["ACC"])) if facetsReg != None: with open("facetsPurities.txt", "a") as fi: fi.write("{}\t{}\n".format(id, facetsReg["purity"])) with open("facetsPloidies.txt", "a") as fi: fi.write("{}\t{}\n".format(id, facetsReg["ploidy"])) with open("facetsJaccard.txt", "a") as fi: fi.write("{}\t{}\n".format(id, jc1)) with open("facetsACCamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["ACC"])) with open("facetsTPRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["TPR"])) with open("facetsTNRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["TNR"])) with open("facetsPPVamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["PPV"])) with open("facetsFDRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["ACC"])) with open("facetsACCdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["ACC"])) with open("facetsTPRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["TPR"])) with open("facetsTNRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["TNR"])) with open("facetsPPVdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["PPV"])) with open("facetsFDRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["ACC"]))
import libcomparison as compi import libgetters as ge import libstatistics as sts import libconstants as cts print( "INFO: Test unitario para comparar el output de un ejemplo de FACETS con los datos del array descargado desde TCGA" ) print("TEST 1) Extraer datos") ar = compi.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-25-1315/1948ef01_VS_d57f7ca3_PURPLE/TUMOR.purple.cnv.somatic.tsv", "PURPLE") fa = compi.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-25-1315/1948ef01_VS_d57f7ca3_FACETS/facets_comp_cncf.tsv", "FACETS") print("TEST 2) Buscando las regiones en comun para estudio") regs = compi.getFragments(ar, fa) print("TEST 3) Crear la tabla comparativa 4x4") dc = compi.doComparison(regs, ar, fa) print(sts.printTable(dc, "Array", "FACETS", False)) print("TEST 4) Resultados de la matriz de confusion para cada aberracion") c1, c2 = sts.calculateCounts(dc) dicContingency = sts.doContingency(dc, ["A", "D", "N"]) print("\tAmplificacion\n\t{}\n\n".format(dicContingency["A"])) print("\tDelecion\n\t{}\n\n".format(dicContingency["D"])) print("\tNormal\n\t{}\n".format(dicContingency["N"])) print("TEST 5) Jaccard index de cada una de las aberraciones") jci2 = compi.doComparison2(regs, ar, fa) jaccard = sts.jaccardIndex(jci2, ["A", "D", "N"]) print("\tJaccard index\n\t{}\n\n".format(jaccard))
def main(): fvaFi = "facetsVSascatngs.tsv" fvsFi = "facetsVSsequenza.tsv" avsFi = "ascatVSsequenza.tsv" # Write the output files' header with open(fvaFi, "w") as fi: fi.write( "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tFpurity\tApurity\tFploidy\tAploidy\n" ) with open(fvsFi, "w") as fi: fi.write( "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tFpurity\tSpurity\tFploidy\tSploidy\n" ) with open(avsFi, "w") as fi: fi.write( "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tApurity\tSpurity\tAploidy\tSploidy\n" ) table = [] with dbcon: cur = dbcon.cursor() q = cur.execute("SELECT submitter FROM patient WHERE cancer='OV'") cases = q.fetchall() for c in cases: # Recollir la informacio dels bams i el sexe que te el cas registrats with dbcon: cur = dbcon.cursor() q = cur.execute( "SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Tumor%'" .format(c[0])) tumors = q.fetchall() q = cur.execute( "SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Normal%'" .format(c[0])) controls = q.fetchall() for tm in tumors: for cn in controls: fva = [] fvs = [] avs = [] # Get the absolute path the and the prefix for the tool output tf = "{wd}/{sub}/{tm}_VS_{cn}".format(wd=wd, sub=c[0], tm=tm[0].split("-")[0], cn=cn[0].split("-")[0]) fva.append(tf.split("/")[-1]) fvs.append(tf.split("/")[-1]) avs.append(tf.split("/")[-1]) facets = "{}_FACETS/facets_comp_cncf.tsv".format(tf) ascat = mm.findAscatName("{}_ASCAT/".format(tf)) sequenza = "{}_Sequenza/{}_segments.txt".format(tf, c[0]) if os.path.isfile(facets): outf = lc.convert2region(facets, "facets") if os.path.isfile(ascat): outa = lc.convert2region(ascat, "ascatngs") if os.path.isfile(sequenza): outs = lc.convert2region(sequenza, "sequenza") # Compare FACETS vs ascatNGS if os.path.isfile(facets) and os.path.isfile(ascat): regs = lc.getFragments(outf, outa) c1 = lc.doComparison(regs, outf, outa) c2 = lc.doComparison2(regs, outf, outa) sts = ls.doContingency( c2) # Get the MCC for all the aberrations jcc = ls.jaccardIndex(c2) fva.append(ls.regSimilarity(regs, outf, outa)) fva.append(ls.baseSimilarity(regs, outf, outa)) for ab in cte.aberrations: fva.append(sts[ab]["MCC"]) fva.append(jcc) fva.append(outf["purity"]) fva.append(outs["purity"]) fva.append(outf["ploidy"]) fva.append(outs["ploidy"]) else: fva.append("NA") fva.append("NA") for ab in cte.aberrations: fva.append("NA") for ab in cte.aberrations: fva.append("NA") fva.append("NA") fva.append("NA") fva.append("NA") fva.append("NA") # Compare FACETS VS Sequenza if os.path.isfile(facets) and os.path.isfile(sequenza): regs = lc.getFragments(outf, outs) c1 = lc.doComparison(regs, outf, outs) c2 = lc.doComparison2(regs, outf, outs) sts = ls.doContingency( c2) # Get the MCC for all the aberrations jcc = ls.jaccardIndex( c2) # Get the Jaccard index for all the aberrations fvs.append(ls.regSimilarity(regs, outf, outs)) fvs.append(ls.baseSimilarity(regs, outf, outs)) for ab in cte.aberrations: fvs.append(sts[ab]["MCC"]) fvs.append(jcc) fvs.append(outf["purity"]) fvs.append(outs["purity"]) fvs.append(outf["ploidy"]) fvs.append(outs["ploidy"]) else: fvs.append("NA") fvs.append("NA") for ab in cte.aberrations: fvs.append("NA") for ab in cte.aberrations: fvs.append("NA") fvs.append("NA") fvs.append("NA") fvs.append("NA") fvs.append("NA") # Compare ascatNGS VS Sequenza if os.path.isfile(ascat) and os.path.isfile(sequenza): regs = lc.getFragments(outa, outs) c1 = lc.doComparison(regs, outa, outs) c2 = lc.doComparison2(regs, outa, outs) sts = ls.doContingency( c2) # Get the MCC for all the aberrations jcc = ls.jaccardIndex( c2) # Get the Jaccard index for all the aberrations avs.append(ls.regSimilarity(regs, outa, outs)) avs.append(ls.baseSimilarity(regs, outa, outs)) for ab in cte.aberrations: avs.append(sts[ab]["MCC"]) avs.append(jcc) avs.append(outf["purity"]) avs.append(outs["purity"]) avs.append(outf["ploidy"]) avs.append(outs["ploidy"]) else: avs.append("NA") avs.append("NA") for ab in cte.aberrations: avs.append("NA") for ab in cte.aberrations: avs.append("NA") avs.append("NA") avs.append("NA") avs.append("NA") avs.append("NA") # Write the output in the corresponding files for each comparison with open(fvaFi, "a") as fi: fi.write( "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n". format(fva[0], fva[1], fva[2], fva[3], fva[4], fva[5], fva[6], fva[7], fva[8], fva[9], fva[10], fva[11])) with open(fvsFi, "a") as fi: fi.write( "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n". format(fvs[0], fvs[1], fvs[2], fvs[3], fvs[4], fvs[5], fvs[6], fvs[7], fvs[8], fvs[9], fvs[10], fvs[11])) with open(avsFi, "a") as fi: fi.write( "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n". format(avs[0], avs[1], avs[2], avs[3], avs[4], avs[5], avs[6], avs[7], avs[8], avs[9], avs[10], avs[11]))
print("Array : {}".format(lg.getCopyNumber(brca1[1:3], brca1[0], array))) print("ascatNGS: {}".format(lg.getCopyNumber(brca1[1:3], brca1[0], ascatngs))) print("FACETS : {}".format(lg.getCopyNumber(brca1[1:3], brca1[0], facets))) print("Sequenza: {}".format(lg.getCopyNumber(brca1[1:3], brca1[0], sequenza))) print("----------------------------------------") print("INFO: Copy number reported in BRCA2") print("ASCAT2 : {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], ascat))) print("Array : {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], array))) print("ascatNGS: {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], ascatngs))) print("FACETS : {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], facets))) print("Sequenza: {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], sequenza))) print("----------------------------------------") print("INFO: Comparing ASCAT vs Array") regs = lc.getFragments(ascat, array) print("\tGenome divided in {} regions".format(ls.regionNumber(regs))) comp = lc.doComparison2(regs, ascat, array) sts = ls.doContingency(comp) print(ls.printTable(comp, "ASCAT2", "Array", False)) print("\n\tMatthews Correlation Coefficient") for s in sts: print("\t\t{} - {}".format(s, sts[s]["MCC"])) print("\n\tJaccard Index") print("\t\t{}".format(ls.jaccardIndex(comp))) print("\n\tBase ACC") print("\t\t{}".format(ls.baseSimilarity(regs, ascat, array))) print("\n\tRegion ACC") print("\t\t{}".format(ls.regSimilarity(regs, ascat, array))) print("----------------------------------------") print("INFO: Comparison numbers") print(comp)