Exemple #1
0
def compareTools(reg1, reg2) :
    regs = lc.getFragments(reg1, reg2)
    comp = lc.doComparison2(regs, reg1, reg2)
    mat = ls.doContingency(comp)
    jcc = ls.jaccardIndex(comp)
    num = ls.regionNumber(regs)
    base = ls.baseSimilarity(regs, reg1, reg2)
    regions = ls.regSimilarity(regs, reg1, reg2)
    jcca = getJaccard(comp, "A")
    jccn = getJaccard(comp, "N")
    jccl = getJaccard(comp, "L")
    jccd = getJaccard(comp, "D")
    st = "{nr}\t{bs}\t{rs}\t{mcca}\t{mccn}\t{mccl}\t{mccd}\t{jcc}\t{jcca}\t{jccn}\t{jccl}\t{jccd}\t{pur1}\t{pur2}\t{plo1}\t{plo2}".format(
        nr = num, bs = base, rs = regions, mcca = mat["A"]["MCC"], mccn = mat["N"]["MCC"], mccl = mat["L"]["MCC"], mccd = mat["D"]["MCC"], jcc = jcc, jcca = jcca, jccn = jccn, jccl = jccl, jccd = jccd, pur1 = reg1["purity"], pur2 = reg2["purity"], plo1 = reg1["ploidy"], plo2 = reg2["ploidy"])
    return st
Exemple #2
0
def launchAnalysis(folder, array, ascat, facets):
    pythonpath = os.path.dirname(os.path.realpath(__file__))
    #Go to the working directory to run the comparison analysis
    print("INFO: Analysing {}".format(folder))
    os.chdir(folder)
    ascatReg = None
    facetsReg = None
    arrayReg = None
    arrayPath = "../{}".format(array)
    id = folder.split("/")[-1]
    if ascat != "":
        ascatReg = comp.convert2region(ascat, "ascat")
    if facets != "":
        facetsReg = comp.convert2region(facets, "facets")

    arrayReg = comp.convert2region(arrayPath, "array")

    #if ascatReg != None and facetsReg != None : #Calculate logR between AscatNGS and FACETS
    #regA_F = comp.getFragments(ascatReg, facetsReg)
    #st.logRcomp(regA_F, ascatReg, facetsReg, "ascatNGS", "FACETS")

    if facetsReg != None and arrayReg != None:
        regAr_F = comp.getFragments(arrayReg, facetsReg)
        mt1 = comp.doComparison2(regAr_F, facetsReg, arrayReg)
        jc1 = st.jaccardIndex(mt1, ["A", "D"])
        mt1 = comp.doComparison(regAr_F, facetsReg, arrayReg)
        cm1 = st.doContingency(mt1, ["A", "D"])

    if ascatReg != None and arrayReg != None:
        regAr_A = comp.getFragments(arrayReg, ascatReg)
        mt2 = comp.doComparison2(regAr_F, ascatReg, arrayReg)
        jc2 = st.jaccardIndex(mt2, ["A", "D"])
        mt2 = comp.doComparison(regAr_F, ascatReg, arrayReg)
        cm2 = st.doContingency(mt2, ["A", "D"])

    #Return to current python path
    os.chdir(pythonpath)
    #Store the summary data in the corresponding files
    if ascatReg != None:
        with open("ascatPurities.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, ascatReg["purity"]))
        with open("ascatPloidies.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, ascatReg["ploidy"]))
        with open("ascatJaccard.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, jc2))
        with open("ascatACCamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["ACC"]))
        with open("ascatTPRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["TPR"]))
        with open("ascatTNRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["TNR"]))
        with open("ascatPPVamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["PPV"]))
        with open("ascatFDRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["A"]["ACC"]))
        with open("ascatACCdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["ACC"]))
        with open("ascatTPRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["TPR"]))
        with open("ascatTNRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["TNR"]))
        with open("ascatPPVdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["PPV"]))
        with open("ascatFDRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm2["D"]["ACC"]))
    if facetsReg != None:
        with open("facetsPurities.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, facetsReg["purity"]))
        with open("facetsPloidies.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, facetsReg["ploidy"]))
        with open("facetsJaccard.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, jc1))
        with open("facetsACCamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["ACC"]))
        with open("facetsTPRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["TPR"]))
        with open("facetsTNRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["TNR"]))
        with open("facetsPPVamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["PPV"]))
        with open("facetsFDRamplification.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["A"]["ACC"]))
        with open("facetsACCdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["ACC"]))
        with open("facetsTPRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["TPR"]))
        with open("facetsTNRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["TNR"]))
        with open("facetsPPVdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["PPV"]))
        with open("facetsFDRdeletion.txt", "a") as fi:
            fi.write("{}\t{}\n".format(id, cm1["D"]["ACC"]))
Exemple #3
0
import libcomparison as compi
import libgetters as ge
import libstatistics as sts
import libconstants as cts

print(
    "INFO: Test unitario para comparar el output de un ejemplo de FACETS con los datos del array descargado desde TCGA"
)
print("TEST 1) Extraer datos")
ar = compi.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-25-1315/1948ef01_VS_d57f7ca3_PURPLE/TUMOR.purple.cnv.somatic.tsv",
    "PURPLE")
fa = compi.convert2region(
    "/g/strcombio/fsupek_cancer2/TCGA_bam/OV/TCGA-25-1315/1948ef01_VS_d57f7ca3_FACETS/facets_comp_cncf.tsv",
    "FACETS")
print("TEST 2) Buscando las regiones en comun para estudio")
regs = compi.getFragments(ar, fa)
print("TEST 3) Crear la tabla comparativa 4x4")
dc = compi.doComparison(regs, ar, fa)
print(sts.printTable(dc, "Array", "FACETS", False))
print("TEST 4) Resultados de la matriz de confusion para cada aberracion")
c1, c2 = sts.calculateCounts(dc)
dicContingency = sts.doContingency(dc, ["A", "D", "N"])
print("\tAmplificacion\n\t{}\n\n".format(dicContingency["A"]))
print("\tDelecion\n\t{}\n\n".format(dicContingency["D"]))
print("\tNormal\n\t{}\n".format(dicContingency["N"]))
print("TEST 5) Jaccard index de cada una de las aberraciones")
jci2 = compi.doComparison2(regs, ar, fa)
jaccard = sts.jaccardIndex(jci2, ["A", "D", "N"])
print("\tJaccard index\n\t{}\n\n".format(jaccard))
Exemple #4
0
def main():
    fvaFi = "facetsVSascatngs.tsv"
    fvsFi = "facetsVSsequenza.tsv"
    avsFi = "ascatVSsequenza.tsv"
    # Write the output files' header
    with open(fvaFi, "w") as fi:
        fi.write(
            "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tFpurity\tApurity\tFploidy\tAploidy\n"
        )
    with open(fvsFi, "w") as fi:
        fi.write(
            "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tFpurity\tSpurity\tFploidy\tSploidy\n"
        )
    with open(avsFi, "w") as fi:
        fi.write(
            "Case\tregSim\tbaseSim\tMCCA\tMCCN\tMCCL\tMCCD\tjcc\tApurity\tSpurity\tAploidy\tSploidy\n"
        )

    table = []
    with dbcon:
        cur = dbcon.cursor()
        q = cur.execute("SELECT submitter FROM patient WHERE cancer='OV'")
        cases = q.fetchall()

    for c in cases:
        # Recollir la informacio dels bams i el sexe que te el cas registrats
        with dbcon:
            cur = dbcon.cursor()
            q = cur.execute(
                "SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Tumor%'"
                .format(c[0]))
            tumors = q.fetchall()
            q = cur.execute(
                "SELECT uuid FROM sample WHERE submitter='{}' AND tumor LIKE '%Normal%'"
                .format(c[0]))
            controls = q.fetchall()
        for tm in tumors:
            for cn in controls:
                fva = []
                fvs = []
                avs = []
                # Get the absolute path the and the prefix for the tool output
                tf = "{wd}/{sub}/{tm}_VS_{cn}".format(wd=wd,
                                                      sub=c[0],
                                                      tm=tm[0].split("-")[0],
                                                      cn=cn[0].split("-")[0])
                fva.append(tf.split("/")[-1])
                fvs.append(tf.split("/")[-1])
                avs.append(tf.split("/")[-1])
                facets = "{}_FACETS/facets_comp_cncf.tsv".format(tf)
                ascat = mm.findAscatName("{}_ASCAT/".format(tf))
                sequenza = "{}_Sequenza/{}_segments.txt".format(tf, c[0])
                if os.path.isfile(facets):
                    outf = lc.convert2region(facets, "facets")
                if os.path.isfile(ascat):
                    outa = lc.convert2region(ascat, "ascatngs")
                if os.path.isfile(sequenza):
                    outs = lc.convert2region(sequenza, "sequenza")
                # Compare FACETS vs ascatNGS
                if os.path.isfile(facets) and os.path.isfile(ascat):
                    regs = lc.getFragments(outf, outa)
                    c1 = lc.doComparison(regs, outf, outa)
                    c2 = lc.doComparison2(regs, outf, outa)
                    sts = ls.doContingency(
                        c2)  # Get the MCC for all the aberrations
                    jcc = ls.jaccardIndex(c2)
                    fva.append(ls.regSimilarity(regs, outf, outa))
                    fva.append(ls.baseSimilarity(regs, outf, outa))
                    for ab in cte.aberrations:
                        fva.append(sts[ab]["MCC"])
                    fva.append(jcc)
                    fva.append(outf["purity"])
                    fva.append(outs["purity"])
                    fva.append(outf["ploidy"])
                    fva.append(outs["ploidy"])
                else:
                    fva.append("NA")
                    fva.append("NA")
                    for ab in cte.aberrations:
                        fva.append("NA")
                    for ab in cte.aberrations:
                        fva.append("NA")
                    fva.append("NA")
                    fva.append("NA")
                    fva.append("NA")
                    fva.append("NA")
                # Compare FACETS VS Sequenza
                if os.path.isfile(facets) and os.path.isfile(sequenza):
                    regs = lc.getFragments(outf, outs)
                    c1 = lc.doComparison(regs, outf, outs)
                    c2 = lc.doComparison2(regs, outf, outs)
                    sts = ls.doContingency(
                        c2)  # Get the MCC for all the aberrations
                    jcc = ls.jaccardIndex(
                        c2)  # Get the Jaccard index for all the aberrations
                    fvs.append(ls.regSimilarity(regs, outf, outs))
                    fvs.append(ls.baseSimilarity(regs, outf, outs))
                    for ab in cte.aberrations:
                        fvs.append(sts[ab]["MCC"])
                    fvs.append(jcc)
                    fvs.append(outf["purity"])
                    fvs.append(outs["purity"])
                    fvs.append(outf["ploidy"])
                    fvs.append(outs["ploidy"])
                else:
                    fvs.append("NA")
                    fvs.append("NA")
                    for ab in cte.aberrations:
                        fvs.append("NA")
                    for ab in cte.aberrations:
                        fvs.append("NA")
                    fvs.append("NA")
                    fvs.append("NA")
                    fvs.append("NA")
                    fvs.append("NA")
                # Compare ascatNGS VS Sequenza
                if os.path.isfile(ascat) and os.path.isfile(sequenza):
                    regs = lc.getFragments(outa, outs)
                    c1 = lc.doComparison(regs, outa, outs)
                    c2 = lc.doComparison2(regs, outa, outs)
                    sts = ls.doContingency(
                        c2)  # Get the MCC for all the aberrations
                    jcc = ls.jaccardIndex(
                        c2)  # Get the Jaccard index for all the aberrations
                    avs.append(ls.regSimilarity(regs, outa, outs))
                    avs.append(ls.baseSimilarity(regs, outa, outs))
                    for ab in cte.aberrations:
                        avs.append(sts[ab]["MCC"])
                    avs.append(jcc)
                    avs.append(outf["purity"])
                    avs.append(outs["purity"])
                    avs.append(outf["ploidy"])
                    avs.append(outs["ploidy"])
                else:
                    avs.append("NA")
                    avs.append("NA")
                    for ab in cte.aberrations:
                        avs.append("NA")
                    for ab in cte.aberrations:
                        avs.append("NA")
                    avs.append("NA")
                    avs.append("NA")
                    avs.append("NA")
                    avs.append("NA")
                # Write the output in the corresponding files for each comparison
                with open(fvaFi, "a") as fi:
                    fi.write(
                        "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".
                        format(fva[0], fva[1], fva[2], fva[3], fva[4], fva[5],
                               fva[6], fva[7], fva[8], fva[9], fva[10],
                               fva[11]))
                with open(fvsFi, "a") as fi:
                    fi.write(
                        "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".
                        format(fvs[0], fvs[1], fvs[2], fvs[3], fvs[4], fvs[5],
                               fvs[6], fvs[7], fvs[8], fvs[9], fvs[10],
                               fvs[11]))
                with open(avsFi, "a") as fi:
                    fi.write(
                        "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".
                        format(avs[0], avs[1], avs[2], avs[3], avs[4], avs[5],
                               avs[6], avs[7], avs[8], avs[9], avs[10],
                               avs[11]))
Exemple #5
0
print("Array   : {}".format(lg.getCopyNumber(brca1[1:3], brca1[0], array)))
print("ascatNGS: {}".format(lg.getCopyNumber(brca1[1:3], brca1[0], ascatngs)))
print("FACETS  : {}".format(lg.getCopyNumber(brca1[1:3], brca1[0], facets)))
print("Sequenza: {}".format(lg.getCopyNumber(brca1[1:3], brca1[0], sequenza)))
print("----------------------------------------")
print("INFO: Copy number reported in BRCA2")
print("ASCAT2  : {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], ascat)))
print("Array   : {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], array)))
print("ascatNGS: {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], ascatngs)))
print("FACETS  : {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], facets)))
print("Sequenza: {}".format(lg.getCopyNumber(brca2[1:3], brca2[0], sequenza)))
print("----------------------------------------")
print("INFO: Comparing ASCAT vs Array")
regs = lc.getFragments(ascat, array)
print("\tGenome divided in {} regions".format(ls.regionNumber(regs)))
comp = lc.doComparison2(regs, ascat, array)
sts = ls.doContingency(comp)
print(ls.printTable(comp, "ASCAT2", "Array", False))
print("\n\tMatthews Correlation Coefficient")
for s in sts:
    print("\t\t{} - {}".format(s, sts[s]["MCC"]))
print("\n\tJaccard Index")
print("\t\t{}".format(ls.jaccardIndex(comp)))
print("\n\tBase ACC")
print("\t\t{}".format(ls.baseSimilarity(regs, ascat, array)))
print("\n\tRegion ACC")
print("\t\t{}".format(ls.regSimilarity(regs, ascat, array)))
print("----------------------------------------")
print("INFO: Comparison numbers")
print(comp)