def getLOH(path, program, gene): sol = "Not found" if os.path.isfile(path): reg = lc.convert2region(path, program, "error") sol = lg.getCopyNumber(gene[1:3], gene[0], reg) return sol
def getLOH(path, program, gene): """Find the copy number (or LOH) in the program passed as parameter in the gene passed as parameter too Additionally, get the purity in the sample calculated by the program Parameters ---------- path : str Output file from the program. Here we will search the LOH (or CNV) program : str Program that has generated the output file passed in the previous parameter. Valid values are: facets, ascatngs, sequenza, purple, and ascatarray gene : list List that contains the chromosome, start and end position of the gene where we want to find the LOH Returns ------- sol : str LOH found in the region. Values can be A, D, L, or N pur : float|str If the purity has been found in the output file, a float with the purity reported. Otherwise "NA" """ sol = "Not found" pur = "Not found" if os.path.isfile(path): reg = lc.convert2region(path, program, "error") pur = lg.getPurity(reg) sol = lg.getCopyNumber(gene[1:3], gene[0], reg) return (sol, pur)
diff = [] bed1 = [] bed2 = [] tf = "{wd}/{sub}/{tm}_VS_{cn}".format(wd=wd, sub=c[0], tm=tm[0].split("-")[0], cn=cn[0].split("-")[0]) filename = tf.split("/")[-1] output = "{}.txt".format(filename) output1 = "{}.regsCoin.bed".format(filename) output2 = "{}.regsDiff.bed".format(filename) facets = "{}_FACETS/facets_comp_cncf.tsv".format(tf) ascat = mm.findAscatName("{}_ASCAT/".format(tf)) sequenza = "{}_Sequenza/{}_segments.txt".format(tf, c[0]) if os.path.isfile(facets): outf = lc.convert2region(facets, "facets", "quiet") if os.path.isfile(ascat): outa = lc.convert2region(ascat, "ascatngs", "quiet") if os.path.isfile(sequenza): outs = lc.convert2region(sequenza, "sequenza", "quiet") # Compare FACETS vs ascatNGS # if os.path.isfile(facets) and os.path.isfile(ascat) : # regs = lc.getFragments(outf, outa) # compareRegions(regs, outf, outa, same, diff, bed1, bed2) # # with open(output, "w") as fi : # fi.write(",".join(same)) # fi.write("\n") # fi.write(",".join(diff)) # fi.write("\n") # with open(output1, "w") as fi :
q = cur.execute("SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Normal%'".format(c[0])) controls = q.fetchall() for tm in tumors : for cn in controls : # Get the analysis absolute path analysis = "{tm}_VS_{cn}".format(tm = tm[0].split("-")[0], cn = cn[0].split("-")[0]) linea = "{}\t".format(analysis) # Get the variant annotation file name tf = "{wd}/{sub}/{tumor}".format(wd = wd, sub = c[0], tumor = tm[0]) cf = "{wd}/{sub}/{control}".format(wd = wd, sub = c[0], control = cn[0]) platypust = "{}/platypusGerm/platypus.hg38_multianno.txt".format(tf) platypusc = "{}/platypusGerm/platypus.hg38_multianno.txt".format(cf) # Get the FACETS, ascatNGS and sequenza output in REGION format ficFa = "{wd}/{sub}/{folder}_FACETS/facets_comp_cncf.tsv".format(wd = wd, sub = c[0], folder = analysis) if os.path.isfile(ficFa) : regFa = lc.convert2region(ficFa, "facets") else : regFa = "X" ficAs = lib.findAscatName("{wd}/{case}/{folder}_ASCAT/".format(wd = wd, case = c[0], folder = analysis)) if os.path.isfile(ficAs) : regAs = lc.convert2region(ficAs, "ascatngs") else : regAs = "X" ficSe = "{wd}/{case}/{folder}_Sequenza/{case}_segments.txt".format(folder = analysis, case = c[0], wd = wd) if os.path.isfile(ficSe) : regSe = lc.convert2region(ficSe, "sequenza") else : regSe = "X" # Get the information regarding the worst variant in the gene selected found in platypus variant calling variant = lib.getWorst(platypusc, "BRCA1") linea += "{}\t".format(variant)
def main(cancer="OV"): """Main program""" wd = "/g/strcombio/fsupek_cancer2/TCGA_bam/{}".format(cancer) txt = "submitter\tcase\tfac_meanCN\tfac_purity\tfac_ploidy\tfac_aberration\tasc_meanCN\tasc_aberration\tseq_meanCN\tseq_purity\tseq_ploidy\tseq_aberration\tpur_meanCN\tpur_purity\t" txt += "pur_ploidy\tpur_aberration\tngs_meanCN\tngs_purity\tngs_ploidy\tngs_aberration\n" na = "NA" outputFile = "meanCN.tsv" count = 0 # Get submitters list with dbcon: cur = dbcon.cursor() q = cur.execute( "SELECT submitter FROM patient WHERE cancer='{}'".format(cancer)) cases = q.fetchall() print("INFO: Analysis done in {} cases".format(len(cases))) for c in cases: count += 1 if count % 100 == 0: print("INFO: {} cases done".format(count)) with dbcon: cur = dbcon.cursor() q = cur.execute( "SELECT uuid, bamName FROM sample WHERE submitter='{}' AND tumor LIKE '%Tumor%'" .format(c[0])) tumors = q.fetchall() q = cur.execute( "SELECT uuid, bamName FROM sample WHERE submitter='{}' AND tumor LIKE '%Normal%'" .format(c[0])) controls = q.fetchall() for tm in tumors: for cn in controls: tf = "{wd}/{sub}/{tumor}".format(wd=wd, sub=c[0], tumor=tm[0]) cf = "{wd}/{sub}/{control}".format(wd=wd, sub=c[0], control=cn[0]) workindir = "{wd}/{sub}".format(wd=wd, sub=c[0]) analysisdir = "{}_VS_{}".format( tm[0].split("-")[0], cn[0].split("-")[0] ) # The folder format for FACETS, ascatNGS, and Sequenza is "[tumorUUID]_VS_[controlUUID]"" # From each tool get the purity/ploidy rAscat = {"purity": na, "ploidy": na} # And calculate, using libstatistics, the mean copy number, and the percentage of (A)mplifications, (L)OH, (D)eletion or (N)ormal copy number sAscat = { "meanCN": na, "perA": na, "perL": na, "perD": na, "perN": na } rFacets = {"purity": na, "ploidy": na} sFacets = { "meanCN": na, "perA": na, "perL": na, "perD": na, "perN": na } rNgs = {"purity": na, "ploidy": na} sNgs = { "meanCN": na, "perA": na, "perL": na, "perD": na, "perN": na } rSequenza = {"purity": na, "ploidy": na} sSequenza = { "meanCN": na, "perA": na, "perL": na, "perD": na, "perN": na } rPurple = {"purity": na, "ploidy": na} sPurple = { "meanCN": na, "perA": na, "perL": na, "perD": na, "perN": na } folder = "{}/ASCAT2".format(workindir) # Collect and calculate all the data # From ASCAT2 if os.path.isdir(folder) and len(os.listdir(folder)) > 0: temp = os.listdir(folder)[0] # TODO: Check all ASCAT files ascat = "{wd}/{fi}".format(wd=folder, fi=temp) rAscat = lc.convert2region(ascat, "ascatarray", "error") sAscat = ls.meanCoverage(rAscat) # From FACETS facets = "{wd}/{folder}_FACETS/facets_comp_cncf.tsv".format( wd=workindir, folder=analysisdir) if os.path.isfile(facets): rFacets = lc.convert2region(facets, "facets", "error") sFacets = ls.meanCoverage(rFacets) # From ascatNGS ascatngs = lib.findAscatName("{wd}/{folder}_ASCAT/".format( wd=workindir, folder=analysisdir)) if ascatngs != "Not found": rNgs = lc.convert2region(ascatngs, "ascatngs", "error") sNgs = ls.meanCoverage(rNgs) # From Sequenza sequenza = "{wd}/{folder}_Sequenza/{case}_segments.txt".format( folder=analysisdir, case=c[0], wd=workindir) if os.path.isfile(sequenza): rSequenza = lc.convert2region(sequenza, "sequenza", "error") sSequenza = ls.meanCoverage(rSequenza) # From PURPLE purple = "{wd}/{folder}_PURPLE/TUMOR.purple.cnv.somatic.tsv".format( wd=workindir, folder=analysisdir) if os.path.isfile(purple): rPurple = lc.convert2region(purple, "purple", "error") sPurple = ls.meanCoverage(rPurple) # Write the output in RAM txt += "{sub}\t{an}\t{fmcn}\t{fpu}\t{fpl}\t{fab}\t{acn}\t{aab}\t{scn}\t{spu}\t{spl}\t{sab}\t{pcn}\t{ppu}\t{ppl}\t{pab}\t{ncn}\t{npu}\t{npl}\t{nab}\n".format( sub=c[0], an=analysisdir, fmcn=sFacets["meanCN"], fpu=rFacets["purity"], fpl=rFacets["ploidy"], fab=convertToCSV(sFacets), acn=sAscat["meanCN"], aab=convertToCSV(sAscat), scn=sSequenza["meanCN"], spu=rSequenza["purity"], spl=rSequenza["ploidy"], sab=convertToCSV(sSequenza), pcn=sPurple["meanCN"], ppu=rPurple["purity"], ppl=rPurple["ploidy"], pab=convertToCSV(sPurple), ncn=sNgs["meanCN"], npu=rNgs["purity"], npl=rNgs["ploidy"], nab=convertToCSV(sNgs)) with open(outputFile, "w") as fi: fi.write(txt) print("INFO: Data stored in {} file".format(outputFile))
def main() : # Constants dbcon = sqlite3.connect("/g/strcombio/fsupek_cancer2/TCGA_bam/info/info.db") cancer = "OV" cancerpath = "/g/strcombio/fsupek_cancer2/TCGA_bam/" if os.path.isdir("main6") : print("ERROR: Folder for output already exists. Remove it before to continue") sys.exit(1) # Get the OV submitters from the database with dbcon : query = "SELECT submitter FROM patient WHERE cancer='{}'".format(cancer) c = dbcon.cursor() x = c.execute(query) submitters = x.fetchall() for sub in submitters : s = sub[0] workindir = "{}/{}/{}".format(cancerpath, cancer, s) print("INFO: Checking {} ASCAT".format(s)) ascatFolder = "{}/ASCAT2/".format(workindir) if os.path.isdir (ascatFolder) : # Open ASCAT2 folder and get the files available ascatFiles = os.listdir(ascatFolder) # Compare ASCAT2 with itself for a in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray") if not os.path.isfile("ascat2VSascat2.tsv") : createFile("ascat2VSascat2.tsv") with open("ascat2VSascat2.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(ascat, ascat))) # Open SNP-array folder and get the files that are in arrayFolder = "{}/Array/".format(workindir) # Compare SNP-Arrays CNV outputs with ASCAT2 if os.path.isdir(arrayFolder) : arrayFiles = os.listdir(arrayFolder) # print("INFO: Comparing ASCAT2 and Array outputs in {}".format(s)) for a in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray") for b in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, b), "array") if not os.path.isfile("ascat2VSarray.tsv") : createFile("ascat2VSarray.tsv") with open("ascat2VSarray.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, arr))) # Compare FACETS LOH/CNV outputs with ASCAT2 facetsFiles = getFACETS(workindir) for a in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray") for b in facetsFiles : f = lc.convert2region(b, "facets", "error") if not os.path.isfile("ascat2VSfacets.tsv") : createFile("ascat2VSfacets.tsv") with open("ascat2VSfacets.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, f))) # Compare ascatNGS LOH/CNV outputs with ASCAT2 # print("INFO: Comparing ASCAT2 and ascatNGS outputs in {}".format(s)) ascatngsFiles = getAscatNGS(workindir) for a in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray") for b in ascatngsFiles : ngs = lc.convert2region(b, "ascatngs", "error") if not os.path.isfile("ascat2VSascatNGS.tsv") : createFile("ascat2VSascatNGS.tsv") with open("ascat2VSascatNGS.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, ngs))) # Compare Sequenza LOH/CNV outputs with ASCAT2 # print("INFO: Comparing ASCAT2 and Sequenza outputs in {}".format(s)) sequenzaFiles = getSequenza(workindir) for a in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray") for b in sequenzaFiles : seq = lc.convert2region(b, "sequenza", "error") if not os.path.isfile("ascat2VSsequenza.tsv") : createFile("ascat2VSsequenza.tsv") with open("ascat2VSsequenza.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, seq))) purpleFiles = getPurple(workindir) for a in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, a), "ascatarray") for b in purpleFiles : purp = lc.convert2region(b, "purple", "error") if not os.path.isfile("ascat2VSpurple.tsv") : createFile("ascat2VSpurple.tsv") with open("ascat2VSpurple.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ascat, purp))) # Move the output data to a new folder os.mkdir("main6") os.rename("ascat2VSascat2.tsv", "main6/ascat2VSascat2.tsv") os.rename("ascat2VSarray.tsv", "main6/ascat2VSarray.tsv") os.rename("ascat2VSfacets.tsv", "main6/ascat2VSfacets.tsv") os.rename("ascat2VSascatNGS.tsv", "main6/ascat2VSascatNGS.tsv") os.rename("ascat2VSsequenza.tsv", "main6/ascat2VSsequenza.tsv") os.rename("ascat2VSpurple.tsv", "main6/ascat2VSpurple.tsv") # Repeat the analysis, but using Arrays as Truth set for sub in submitters : s = sub[0] workindir = "{}/{}/{}".format(cancerpath, cancer, s) print("INFO: Checking {} arrays".format(s)) # Open SNP-Array folder and get the files available arrayFolder = "{}/Array/".format(workindir) if os.path.isdir (arrayFolder) : arrayFiles = os.listdir(arrayFolder) # Compare arrays with itself for a in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array") if not os.path.isfile("arrayVSarray.tsv") : createFile("arrayVSarray.tsv") with open("arrayVSarray.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(arr, arr))) # Open ASCAT2 folder to get the files that are in ascatFolder = "{}/ASCAT2/".format(workindir) # Compare ASCAT2 outputs with Arrays if os.path.isdir(ascatFolder) : # print("INFO: Comparing ASCAT2 and Array outputs in {}".format(s)) ascatFiles = os.listdir(ascatFolder) for a in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array") for b in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, b), "ascatarray") if not os.path.isfile("arrayVSascat2.tsv") : createFile("arrayVSascat2.tsv") with open("arrayVSascat2.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, ascat))) # Compare FACETS LOH/CNV outputs with SNP-Array # print("INFO: Comparing ASCAT2 and FACETS outputs in {}".format(s)) facetsFiles = getFACETS(workindir) for a in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array") for b in facetsFiles : f = lc.convert2region(b, "facets", "error") if not os.path.isfile("arrayVSfacets.tsv") : createFile("arrayVSfacets.tsv") with open("arrayVSfacets.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, f))) # Compare ascatNGS LOH/CNV outputs with SNP-Array # print("INFO: Comparing ASCAT2 and ascatNGS outputs in {}".format(s)) ascatngsFiles = getAscatNGS(workindir) for a in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array") for b in ascatngsFiles : ngs = lc.convert2region(b, "ascatngs", "error") if not os.path.isfile("arrayVSascatNGS.tsv") : createFile("arrayVSascatNGS.tsv") with open("arrayVSascatNGS.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, ngs))) # Compare Sequenza LOH/CNV outputs with ASCAT2 # print("INFO: Comparing ASCAT2 and Sequenza outputs in {}".format(s)) sequenzaFiles = getSequenza(workindir) for a in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array") for b in sequenzaFiles : seq = lc.convert2region(b, "sequenza", "error") if not os.path.isfile("arrayVSsequenza.tsv") : createFile("arrayVSsequenza.tsv") with open("arrayVSsequenza.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, seq))) # Compare PURPLE LOH/CNV outputs with DNAcopy purpleFiles = getPurple(workindir) for a in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, a), "array") for b in purpleFiles : purp = lc.convert2region(b, "purple", "error") if not os.path.isfile("arrayVSpurple.tsv") : createFile("arrayVSpurple.tsv") with open("arrayVSpurple.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(arr, purp))) os.rename("arrayVSarray.tsv", "main6/arrayVSarray.tsv") os.rename("arrayVSascat2.tsv", "main6/arrayVSascat2.tsv") os.rename("arrayVSfacets.tsv", "main6/arrayVSfacets.tsv") os.rename("arrayVSascatNGS.tsv", "main6/arrayVSascatNGS.tsv") os.rename("arrayVSsequenza.tsv", "main6/arrayVSsequenza.tsv") os.rename("arrayVSpurple.tsv", "main6/arrayVSpurple.tsv") # Repeat the analysis but comparing FACETS vs all the other tools for sub in submitters : s = sub[0] workindir = "{}/{}/{}".format(cancerpath, cancer, s) print("INFO: Checking {} FACETS".format(s)) # Get all the FACETS done in the submitter facetsFiles = getFACETS(workindir) for a in facetsFiles : # Compàre FACETS with itself f = lc.convert2region(a, "facets", "error") if not os.path.isfile("facetsVSfacets.tsv") : createFile("facetsVSfacets.tsv") with open("facetsVSfacets.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(f, f))) # Compare with ASCAT2 ascatFolder = "{}/ASCAT2".format(workindir) if os.path.isdir (ascatFolder) : # Open ASCAT2 folder and get the files available ascatFiles = os.listdir(ascatFolder) # Compare ASCAT2 with itself for b in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, b), "ascatarray", "error") if not os.path.isfile("facetsVSascat2.tsv") : createFile("facetsVSascat2.tsv") with open("facetsVSascat2.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, ascat))) # Compare with SNP-Arrays arrayFolder = "{}/Array".format(workindir) if os.path.isdir(arrayFolder) : arrayFiles = os.listdir(arrayFolder) for b in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, b), "array", "error") if not os.path.isfile("facetsVSarrays.tsv") : createFile("facetsVSarrays.tsv") with open("facetsVSarrays.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, arr))) # Compare with ascatNGS ascatngsFiles = getAscatNGS(workindir) for b in ascatngsFiles : ngs = lc.convert2region(b, "ascatngs", "error") if not os.path.isfile("facetsVSascatNGS.tsv") : createFile("facetsVSascatNGS.tsv") with open("facetsVSascatNGS.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, ngs))) # Compare with Sequenza sequenzaFiles = getSequenza(workindir) for b in sequenzaFiles : seq = lc.convert2region(b, "sequenza", "error") if not os.path.isfile("facetsVSsequenza.tsv") : createFile("facetsVSsequenza.tsv") with open("facetsVSsequenza.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, seq))) # Compare with PURPLE purpleFiles = getPurple(workindir) for b in purpleFiles : purp = lc.convert2region(b, "purple", "error") if not os.path.isfile("facetsVSpurple.tsv") : createFile("facetsVSpurple.tsv") with open("facetsVSpurple.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(f, purp))) os.rename("facetsVSfacets.tsv", "main6/facetsVSfacets.tsv") os.rename("facetsVSascat2.tsv", "main6/facetsVSascat2.tsv") os.rename("facetsVSarrays.tsv", "main6/facetsVSarrays.tsv") os.rename("facetsVSascatNGS.tsv", "main6/facetsVSascatNGS.tsv") os.rename("facetsVSsequenza.tsv", "main6/facetsVSsequenza.tsv") os.rename("facetsVSpurple.tsv", "main6/facetsVSpurple.tsv") # Repeat the analysis, but comparing ascatNGS vs all the other tools for sub in submitters : s = sub[0] workindir = "{}/{}/{}".format(cancerpath, cancer, s) print("INFO: Checking {} ascatNGS".format(s)) # Get all the ascatNGS done in the submitter ascatngsFiles = getAscatNGS(workindir) for a in ascatngsFiles : # Compare ascatNGS vs itself ngs = lc.convert2region(a, "ascatngs", "error") if not os.path.isfile("ascatNGSVSascatNGS.tsv") : createFile("ascatNGSVSascatNGS.tsv") with open("ascatNGSVSascatNGS.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(ngs, ngs))) # Compare with ASCAT2 ascatFolder = "{}/ASCAT2/".format(workindir) if os.path.isdir(ascatFolder) : ascatFiles = os.listdir(ascatFolder) for b in ascatFiles : ascat = lc.convert2region("{}{}".format(ascatFolder, b), "ascatarray", "error") if not os.path.isfile("ascatNGSVSascat2.tsv") : createFile("ascatNGSVSascat2.tsv") with open("ascatNGSVSascat2.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, ascat))) # Compare with SNP-Arrays arrayFolder = "{}/Array/".format(workindir) if os.path.isdir(arrayFolder) : arrayFiles = os.listdir(arrayFolder) for b in arrayFiles : arr = lc.convert2region("{}{}".format(arrayFolder, b), "array", "error") if not os.path.isfile("ascatNGSVSarrays.tsv") : createFile("ascatNGSVSarrays.tsv") with open("ascatNGSVSarrays.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, arr))) # Compare with FACETS facetsFiles = getFACETS(workindir) for b in facetsFiles : f = lc.convert2region(b, "facets", "error") if not os.path.isfile("ascatNGSVSfacets.tsv") : createFile("ascatNGSVSfacets.tsv") with open("ascatNGSVSfacets.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, f))) # Compare with Sequenza sequenzaFiles = getSequenza(workindir) for b in sequenzaFiles : seq = lc.convert2region(b, "sequenza", "error") if not os.path.isfile("ascatNGSVSsequenza.tsv") : createFile("ascatNGSVSsequenza.tsv") with open("ascatNGSVSsequenza.tsv", "a") as fi: fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, seq))) # Compare with PURPLE purpleFiles = getPurple(workindir) for b in purpleFiles : purp = lc.convert2region(b, "purple", "error") if not os.path.isfile("ascatNGSVSpurple.tsv") : createFile("ascatNGSVSpurple.tsv") with open("ascatNGSVSpurple.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(ngs, purp))) os.rename("ascatNGSVSascat2.tsv", "main6/ascatNGSVSascat2.tsv") os.rename("ascatNGSVSarrays.tsv", "main6/ascatNGSVSarrays.tsv") os.rename("ascatNGSVSfacets.tsv", "main6/ascatNGSVSfacets.tsv") os.rename("ascatNGSVSascatNGS.tsv", "main6/ascatNGSVSascatNGS.tsv") os.rename("ascatNGSVSsequenza.tsv", "main6/ascatNGSVSsequenza.tsv") os.rename("ascatNGSVSpurple.tsv", "main6/ascatNGSVSpurple.tsv") # Repeat the analysis, but comparing Sequenza vs all the other approximations for sub in submitters : s = sub[0] workindir = "{}/{}/{}".format(cancerpath, cancer, s) print("INFO: Checking {} Sequenza".format(s)) # Get all the Sequenza done in the submitter sequenzaFiles = getSequenza(workindir) for a in sequenzaFiles : # Compare sequenza vs itself seq = lc.convert2region(a, "sequenza", "error") if not os.path.isfile("sequenzaVSsequenza.tsv") : createFile("sequenzaVSsequenza.tsv") with open("sequenzaVSsequenza.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(seq, seq))) # Compare with ASCAT2 ascatFolder = "{}/ASCAT2".format(workindir) if os.path.isdir(ascatFolder) : ascatFiles = os.listdir(ascatFolder) for b in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, b), "ascatarray", "error") if not os.path.isfile("sequenzaVSascat2.tsv") : createFile("sequenzaVSascat2.tsv") with open("sequenzaVSascat2.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, ascat))) # Compare with SNP-Arrays arrayFolder = "{}/Array".format(workindir) if os.path.isdir(arrayFolder) : arrayFiles = os.listdir(arrayFolder) for b in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, b), "array", "error") if not os.path.isfile("sequenzaVSarrays.tsv") : createFile("sequenzaVSarrays.tsv") with open("sequenzaVSarrays.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, arr))) # Compare with FACETS facetsFiles = getFACETS(workindir) for b in facetsFiles : f = lc.convert2region(b, "facets", "error") if not os.path.isfile("sequenzaVSfacets.tsv") : createFile("sequenzaVSfacets.tsv") with open("sequenzaVSfacets.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, f))) # Compare with ascatNGS ascatngsFiles = getAscatNGS(workindir) for b in ascatngsFiles : ngs = lc.convert2region(b, "ascatngs", "error") if not os.path.isfile("sequenzaVSascatNGS.tsv") : createFile("sequenzaVSascatNGS.tsv") with open("sequenzaVSascatNGS.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, ngs))) # Compare with PURPLE purpleFiles = getPurple(workindir) for b in purpleFiles : purp = lc.convert2region(b, "purple", "error") if not os.path.isfile("sequenzaVSpurple.tsv") : createFile("sequenzaVSpurple.tsv") with open("sequenzaVSpurple.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(seq, purp))) os.rename("sequenzaVSascat2.tsv", "main6/sequenzaVSascat2.tsv") os.rename("sequenzaVSarrays.tsv", "main6/sequenzaVSarrays.tsv") os.rename("sequenzaVSfacets.tsv", "main6/sequenzaVSfacets.tsv") os.rename("sequenzaVSascatNGS.tsv", "main6/sequenzaVSascatNGS.tsv") os.rename("sequenzaVSsequenza.tsv", "main6/sequenzaVSsequenza.tsv") os.rename("sequenzaVSpurple.tsv", "main6/sequenzaVSpurple.tsv") # Repeat the analysis but comparing PURPLE vs all the other tools for sub in submitters : s = sub[0] workindir = "{}/{}/{}".format(cancerpath, cancer, s) print("INFO: Checking {} PURPLE".format(s)) # Get all the Sequenza done in the submitter purpleFiles = getPurple(workindir) for a in purpleFiles : # Compare PURPLE vs itself purp = lc.convert2region(a, "purple", "error") if not os.path.isfile("purpleVSpurple.tsv") : createFile("purpleVSpurple.tsv") with open("purpleVSpurple.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = a, cmp = compareTools(purp, purp))) # Compare with ASCAT2 ascatFolder = "{}/ASCAT2".format(workindir) if os.path.isdir(ascatFolder) : ascatFiles = os.listdir(ascatFolder) for b in ascatFiles : ascat = lc.convert2region("{}/{}".format(ascatFolder, b), "ascatarray", "error") if not os.path.isfile("purpleVSascat2.tsv") : createFile("purpleVSascat2.tsv") with open("purpleVSascat2.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, ascat))) # Compare with SNP-Arrays arrayFolder = "{}/Array".format(workindir) if os.path.isdir(arrayFolder) : arrayFiles = os.listdir(arrayFolder) for b in arrayFiles : arr = lc.convert2region("{}/{}".format(arrayFolder, b), "array", "error") if not os.path.isfile("purpleVSarrays.tsv") : createFile("purpleVSarrays.tsv") with open("purpleVSarrays.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, arr))) # Compare with FACETS facetsFiles = getFACETS(workindir) for b in facetsFiles : f = lc.convert2region(b, "facets", "error") if not os.path.isfile("purpleVSfacets.tsv") : createFile("purpleVSfacets.tsv") with open("purpleVSfacets.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, f))) # Compare with ascatNGS ascatngsFiles = getAscatNGS(workindir) for b in ascatngsFiles : ngs = lc.convert2region(b, "ascatngs", "error") if not os.path.isfile("purpleVSascatNGS.tsv") : createFile("purpleVSascatNGS.tsv") with open("purpleVSascatNGS.tsv", "a") as fi : fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, ngs))) # Compare with Sequenza sequenzaFiles = getSequenza(workindir) for b in sequenzaFiles : seq = lc.convert2region(b, "sequenza", "error") if not os.path.isfile("purpleVSsequenza.tsv") : createFile("purpleVSsequenza.tsv") with open("purpleVSsequenza.tsv", "a") as fi: fi.write("{sub}\t{id1}\t{id2}\t{cmp}\n".format(sub = sub[0], id1 = a, id2 = b, cmp = compareTools(purp, seq))) os.rename("purpleVSascat2.tsv", "main6/purpleVSascat2.tsv") os.rename("purpleVSarrays.tsv", "main6/purpleVSarrays.tsv") os.rename("purpleVSfacets.tsv", "main6/purpleVSfacets.tsv") os.rename("purpleVSascatNGS.tsv", "main6/purpleVSascatNGS.tsv") os.rename("purpleVSsequenza.tsv", "main6/purpleVSsequenza.tsv") os.rename("purpleVSpurple.tsv", "main6/purpleVSpurple.tsv")
def launchAnalysis(folder, array, ascat, facets): pythonpath = os.path.dirname(os.path.realpath(__file__)) #Go to the working directory to run the comparison analysis print("INFO: Analysing {}".format(folder)) os.chdir(folder) ascatReg = None facetsReg = None arrayReg = None arrayPath = "../{}".format(array) id = folder.split("/")[-1] if ascat != "": ascatReg = comp.convert2region(ascat, "ascat") if facets != "": facetsReg = comp.convert2region(facets, "facets") arrayReg = comp.convert2region(arrayPath, "array") #if ascatReg != None and facetsReg != None : #Calculate logR between AscatNGS and FACETS #regA_F = comp.getFragments(ascatReg, facetsReg) #st.logRcomp(regA_F, ascatReg, facetsReg, "ascatNGS", "FACETS") if facetsReg != None and arrayReg != None: regAr_F = comp.getFragments(arrayReg, facetsReg) mt1 = comp.doComparison2(regAr_F, facetsReg, arrayReg) jc1 = st.jaccardIndex(mt1, ["A", "D"]) mt1 = comp.doComparison(regAr_F, facetsReg, arrayReg) cm1 = st.doContingency(mt1, ["A", "D"]) if ascatReg != None and arrayReg != None: regAr_A = comp.getFragments(arrayReg, ascatReg) mt2 = comp.doComparison2(regAr_F, ascatReg, arrayReg) jc2 = st.jaccardIndex(mt2, ["A", "D"]) mt2 = comp.doComparison(regAr_F, ascatReg, arrayReg) cm2 = st.doContingency(mt2, ["A", "D"]) #Return to current python path os.chdir(pythonpath) #Store the summary data in the corresponding files if ascatReg != None: with open("ascatPurities.txt", "a") as fi: fi.write("{}\t{}\n".format(id, ascatReg["purity"])) with open("ascatPloidies.txt", "a") as fi: fi.write("{}\t{}\n".format(id, ascatReg["ploidy"])) with open("ascatJaccard.txt", "a") as fi: fi.write("{}\t{}\n".format(id, jc2)) with open("ascatACCamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["ACC"])) with open("ascatTPRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["TPR"])) with open("ascatTNRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["TNR"])) with open("ascatPPVamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["PPV"])) with open("ascatFDRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["A"]["ACC"])) with open("ascatACCdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["ACC"])) with open("ascatTPRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["TPR"])) with open("ascatTNRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["TNR"])) with open("ascatPPVdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["PPV"])) with open("ascatFDRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm2["D"]["ACC"])) if facetsReg != None: with open("facetsPurities.txt", "a") as fi: fi.write("{}\t{}\n".format(id, facetsReg["purity"])) with open("facetsPloidies.txt", "a") as fi: fi.write("{}\t{}\n".format(id, facetsReg["ploidy"])) with open("facetsJaccard.txt", "a") as fi: fi.write("{}\t{}\n".format(id, jc1)) with open("facetsACCamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["ACC"])) with open("facetsTPRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["TPR"])) with open("facetsTNRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["TNR"])) with open("facetsPPVamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["PPV"])) with open("facetsFDRamplification.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["A"]["ACC"])) with open("facetsACCdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["ACC"])) with open("facetsTPRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["TPR"])) with open("facetsTNRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["TNR"])) with open("facetsPPVdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["PPV"])) with open("facetsFDRdeletion.txt", "a") as fi: fi.write("{}\t{}\n".format(id, cm1["D"]["ACC"]))
def main(): fvaFi = "facetsVSascatngs.tsv" fvsFi = "facetsVSsequenza.tsv" avsFi = "ascatVSsequenza.tsv" # Write the output files' header with open(fvaFi, "w") as fi: fi.write( "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tFpurity\tApurity\tFploidy\tAploidy\n" ) with open(fvsFi, "w") as fi: fi.write( "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tFpurity\tSpurity\tFploidy\tSploidy\n" ) with open(avsFi, "w") as fi: fi.write( "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tApurity\tSpurity\tAploidy\tSploidy\n" ) table = [] with dbcon: cur = dbcon.cursor() q = cur.execute("SELECT submitter FROM patient WHERE cancer='OV'") cases = q.fetchall() for c in cases: # Recollir la informacio dels bams i el sexe que te el cas registrats with dbcon: cur = dbcon.cursor() q = cur.execute( "SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Tumor%'" .format(c[0])) tumors = q.fetchall() q = cur.execute( "SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Normal%'" .format(c[0])) controls = q.fetchall() for tm in tumors: for cn in controls: fva = [] fvs = [] avs = [] # Get the absolute path the and the prefix for the tool output tf = "{wd}/{sub}/{tm}_VS_{cn}".format(wd=wd, sub=c[0], tm=tm[0].split("-")[0], cn=cn[0].split("-")[0]) fva.append(tf.split("/")[-1]) fvs.append(tf.split("/")[-1]) avs.append(tf.split("/")[-1]) facets = "{}_FACETS/facets_comp_cncf.tsv".format(tf) ascat = mm.findAscatName("{}_ASCAT/".format(tf)) sequenza = "{}_Sequenza/{}_segments.txt".format(tf, c[0]) if os.path.isfile(facets): outf = lc.convert2region(facets, "facets") if os.path.isfile(ascat): outa = lc.convert2region(ascat, "ascatngs") if os.path.isfile(sequenza): outs = lc.convert2region(sequenza, "sequenza") # Compare FACETS vs ascatNGS if os.path.isfile(facets) and os.path.isfile(ascat): regs = lc.getFragments(outf, outa) c1 = lc.doComparison(regs, outf, outa) c2 = lc.doComparison2(regs, outf, outa) sts = ls.doContingency( c2) # Get the MCC for all the aberrations jcc = ls.jaccardIndex(c2) fva.append(ls.regSimilarity(regs, outf, outa)) fva.append(ls.baseSimilarity(regs, outf, outa)) for ab in cte.aberrations: fva.append(sts[ab]["MCC"]) fva.append(jcc) fva.append(outf["purity"]) fva.append(outs["purity"]) fva.append(outf["ploidy"]) fva.append(outs["ploidy"]) else: fva.append("NA") fva.append("NA") for ab in cte.aberrations: fva.append("NA") for ab in cte.aberrations: fva.append("NA") fva.append("NA") fva.append("NA") fva.append("NA") fva.append("NA") # Compare FACETS VS Sequenza if os.path.isfile(facets) and os.path.isfile(sequenza): regs = lc.getFragments(outf, outs) c1 = lc.doComparison(regs, outf, outs) c2 = lc.doComparison2(regs, outf, outs) sts = ls.doContingency( c2) # Get the MCC for all the aberrations jcc = ls.jaccardIndex( c2) # Get the Jaccard index for all the aberrations fvs.append(ls.regSimilarity(regs, outf, outs)) fvs.append(ls.baseSimilarity(regs, outf, outs)) for ab in cte.aberrations: fvs.append(sts[ab]["MCC"]) fvs.append(jcc) fvs.append(outf["purity"]) fvs.append(outs["purity"]) fvs.append(outf["ploidy"]) fvs.append(outs["ploidy"]) else: fvs.append("NA") fvs.append("NA") for ab in cte.aberrations: fvs.append("NA") for ab in cte.aberrations: fvs.append("NA") fvs.append("NA") fvs.append("NA") fvs.append("NA") fvs.append("NA") # Compare ascatNGS VS Sequenza if os.path.isfile(ascat) and os.path.isfile(sequenza): regs = lc.getFragments(outa, outs) c1 = lc.doComparison(regs, outa, outs) c2 = lc.doComparison2(regs, outa, outs) sts = ls.doContingency( c2) # Get the MCC for all the aberrations jcc = ls.jaccardIndex( c2) # Get the Jaccard index for all the aberrations avs.append(ls.regSimilarity(regs, outa, outs)) avs.append(ls.baseSimilarity(regs, outa, outs)) for ab in cte.aberrations: avs.append(sts[ab]["MCC"]) avs.append(jcc) avs.append(outf["purity"]) avs.append(outs["purity"]) avs.append(outf["ploidy"]) avs.append(outs["ploidy"]) else: avs.append("NA") avs.append("NA") for ab in cte.aberrations: avs.append("NA") for ab in cte.aberrations: avs.append("NA") avs.append("NA") avs.append("NA") avs.append("NA") avs.append("NA") # Write the output in the corresponding files for each comparison with open(fvaFi, "a") as fi: fi.write( "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n". format(fva[0], fva[1], fva[2], fva[3], fva[4], fva[5], fva[6], fva[7], fva[8], fva[9], fva[10], fva[11])) with open(fvsFi, "a") as fi: fi.write( "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n". format(fvs[0], fvs[1], fvs[2], fvs[3], fvs[4], fvs[5], fvs[6], fvs[7], fvs[8], fvs[9], fvs[10], fvs[11])) with open(avsFi, "a") as fi: fi.write( "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n". format(avs[0], avs[1], avs[2], avs[3], avs[4], avs[5], avs[6], avs[7], avs[8], avs[9], avs[10], avs[11]))
#!/usr/bin/python # -*- coding: utf-8 -*- """ MAIN: Testing the function that counts the number of bases that have each aberration """ import libcomparison as lc import libstatistics as ls import libconstants as ct import libgetters as lg print("INFO: Loading example from FACETS") f = lc.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0757/1f1f7441_VS_26fa0e90_FACETS/facets_comp_cncf.tsv", "facets") print("INFO: Loading example from ascatNGS") a = lc.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0757/1f1f7441_VS_26fa0e90_ASCAT/TCGA-04-1331-01A-01W.copynumber.caveman.csv", "ascatngs") print("INFO: Loading example from Sequenza") s = lc.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0757/1f1f7441_VS_26fa0e90_Sequenza/TCGA-04-1331_segments.txt", "sequenza") print("INFO: Loading example from PURPLE") p = lc.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0757/1f1f7441_VS_26fa0e90_PURPLE/TUMOR.purple.cnv.somatic.tsv", "purple") allbases = [] it = 0 current = 0 """
""" MAIN: Example of introduction of sequenza """ """ The test includes: 1. Open the sample outputs from sequenza, facets, ascatNGS, and array respectively 2. Compare all tools against arrays in the whole genome 3. Get the copy number reported by the 4 tools in specific regions: BRCA1, BRCA2, PALB2, and ATM """ import libcomparison as lc import libgetters as lg import libstatistics as ls # Open the output for the sample TCGA-04-1332 output from all the tools sequenza = lc.convert2region( "../90cf56c6_VS_f4b549d0_Sequenza/TCGA-04-1332_segments.txt", "sequenza") facets = lc.convert2region( "../90cf56c6c_VS_f4b549d0_FACETS/facets_comp_cncf.tsv", "facets") ascat = lc.convert2region( "../90cf56c6_VS_f4b549d0_ASCAT/H_GP-04-1332-01A-01W-0488-09-1.copynumber.caveman.csv", "ascatngs") array = lc.convert2region( "../73a3a9bb-7dfc-4fc5-9f31-b2630c82010b_Array/QUANT_p_TCGA_Batch12_AFFX_GenomeWideSNP_6_F05_437768.grch38.seg.v2.txt", "array") print("INFO: Arxius oberts satisfactoriament") # Print the counts in each file print("\nINFO: Resum de les dades obteses en cada eina") car = ls.countsXtool(array) cs = ls.countsXtool(sequenza) cf = ls.countsXtool(facets)
4) Extract confusion matrix for (A)mplification, (D)eletion, and (N)ormal copy number 5) Calculate the Jaccard index for the same aberrations """ import libextractfile as exfi import libcomparison as compi import libgetters as ge import libstatistics as sts import libconstants as cts print( "INFO: Test unitario para comparar el output de un ejemplo de FACETS con los datos del array descargado desde TCGA" ) print("TEST 1) Extraer datos") ar = compi.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-25-1315/1948ef01_VS_d57f7ca3_PURPLE/TUMOR.purple.cnv.somatic.tsv", "PURPLE") fa = compi.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-25-1315/1948ef01_VS_d57f7ca3_FACETS/facets_comp_cncf.tsv", "FACETS") print("TEST 2) Buscando las regiones en comun para estudio") regs = compi.getFragments(ar, fa) print("TEST 3) Crear la tabla comparativa 4x4") dc = compi.doComparison(regs, ar, fa) print(sts.printTable(dc, "Array", "FACETS", False)) print("TEST 4) Resultados de la matriz de confusion para cada aberracion") c1, c2 = sts.calculateCounts(dc) dicContingency = sts.doContingency(dc, ["A", "D", "N"]) print("\tAmplificacion\n\t{}\n\n".format(dicContingency["A"])) print("\tDelecion\n\t{}\n\n".format(dicContingency["D"])) print("\tNormal\n\t{}\n".format(dicContingency["N"]))
1. Open example outputs from sequenza, facets, ascatNGS, array, and ASCAT2 2. Count the aberrations reported by each tool 3. Get the copy number reported by the all outputs in BRCA1 and BRCA2 genes """ import libcomparison as lc import libgetters as lg import libstatistics as ls # BRCA1/2 gene coordinates as reported by bioGPS brca1 = ["17", 43044295, 43170245] brca2 = ["13", 32315086, 32400266] # Convert the files to REGION format ascatngs = lc.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/90cf56c6_VS_f4b549d0_ASCAT/H_GP-04-1332-01A-01W-0488-09-1.copynumber.caveman.csv", "ascatngs", "error") sequenza = lc.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/90cf56c6_VS_f4b549d0_Sequenza/TCGA-04-1332_segments.txt", "sequenza", "error") facets = lc.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/90cf56c6_VS_f4b549d0_FACETS/facets_comp_cncf.tsv", "facets", "error") array = lc.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/Array/QUANT_p_TCGA_Batch12_AFFX_GenomeWideSNP_6_E11_437726.grch38.seg.v2.txt", "array") ascat = lc.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-04-1332/ASCAT2/TCGA-OV.79e63073-7d6d-456b-92c7-a3a7f0216ee7.ascat2.allelic_specific.seg.txt", "ascatarray") print("INFO: Files opened successfully")
6) ggplot of minor copy number (lcn) """ import libextractfile as exfi import libcomparison as compi import libgetters as ge import libstatistics as sts import libconstants as cts import os print( "INFO: Test unitario para comprobar graficas comparando un ejemplo de ascatNGS y uno de FACETS" ) print("TEST 1) Extraer los datos") ascat = compi.convert2region( "/home/labs/solelab/ffuster2/Desktop/doctorat/cas_estudi/input_examples/TCGA-09-0369/TCGA-09-0369_40e311a4_VS_f4441d6e/H_GP-09-0369-01A-01W-0372-09-1.copynumber.caveman.csv", "ascat") facets = compi.convert2region( "/home/labs/solelab/ffuster2/Desktop/doctorat/cas_estudi/input_examples/TCGA-09-0369/TCGA-09-0369_40e311a4_VS_f4441d6e/facets_comp_cncf.tsv", "FACETS") print("TEST 2) Dividir las regiones para obtener regiones en comun") regs = compi.getFragments(facets, ascat) print("TEST 3) Dibujar la concordancia entre los logR") try: sts.logRcomp(regs, facets, ascat, "FACETS", "ASCAT") except ValueError: print("ERROR: Cannot create the logR plot") print("TEST 4) Dibujar los copy number counts usando la libreria ggplot") sts.doGGplotFiles(facets, ascat, "FACETS", "ASCAT") print( "TEST 5) Crear un bed con las regiones reportadas por cada archivo y las regiones en comun"
percent = 100 * float(coin) / float(all) return percent if __name__ == "__main__": """ UNIT TEST """ pr1 = "FACETS" pr2 = "ascatngs" print( "\n\n\t\tWELCOME TO libstatistics.py UNIT TEST\n\t\t-------------------------------------\n" ) print("Reading FACETS example") fa = comp.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0887/4d1eb382_VS_5820c55c_FACETS/facets_comp_cncf.tsv", pr1) print("Reading AscatNGS example") s = comp.convert2region( "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-13-0887/4d1eb382_VS_5820c55c_ASCAT/TCGA-13-0887-01A-01W.copynumber.caveman.csv", pr2) print("Read complete. Getting the fragments") regs = comp.getFragments(fa, s) print("Got fragments. Checking the copy number") dc = comp.doComparison(regs, fa, s) print("Copy number done. Preparing some statistics") print("1) Counts") c1, c2 = calculateCounts(dc) print("2) Counts per tool") counts1, count2 = countsXtool(fa, s) print("3) Bases reported in each aberration")