Ejemplo n.º 1
0
def fromVarfilter2tsvLikeCyrielle(ficVar):
    print ficVar
    ficSnp = traitementSamtools.decomposeVarfilter(ficVar)
    ficTsv = "%s.tsv" % ficSnp

    of = open(ficTsv, "w")
    fM = open(ficSnp, "r")
    line = fM.readline()
    while line:
        el = line.split("\t")
        # ne tient pas compte des indels etc dans la ligne
        #print "---%s---" % el
        while string.find(el[8], "^") != -1:
            el[8] = string.replace(el[8], el[8][string.find(el[8], "^"):string.find(el[8], "^") + 2], "")
        while string.find(el[8], "+") != -1:
            el[8] = string.replace(el[8], el[8][string.find(el[8], "+"):string.find(el[8], "+") + string.atoi(
                el[8][string.find(el[8], "+") + 1]) + 2], "")
        while string.find(el[8], "-") != -1:
            el[8] = string.replace(el[8], el[8][string.find(el[8], "-"):string.find(el[8], "-") + string.atoi(
                el[8][string.find(el[8], "-") + 1]) + 2], "")

        nbA = el[8].count("A") + el[8].count("a")
        nbG = el[8].count("G") + el[8].count("g")
        nbC = el[8].count("C") + el[8].count("c")
        nbT = el[8].count("T") + el[8].count("t")

        cov = string.atof(el[7])
        nbRef = el[8].count(".") + el[8].count(",")

        if el[2] == "a" or el[2] == "A":
            nbA = nbRef
        elif el[2] == "g" or el[2] == "G":
            nbG = nbRef
        elif el[2] == "c" or el[2] == "C":
            nbC = nbRef
        elif el[2] == "t" or el[2] == "T":
            nbT = nbRef

        if el[3] == "A":
            nbCns = nbA
        elif el[3] == "G":
            nbCns = nbG
        elif el[3] == "C":
            nbCns = nbC
        elif el[3] == "T":
            nbCns = nbT
        else:
            nbCns = "-"

        listNb = [nbA, nbC, nbG, nbT]
        resMaj = max(listNb)
        ratio = nbRef / cov
        ratio2 = resMaj / cov
        if ratio2 != 1:
            of.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\t%s\t%s\t%s\t%s\t%.0f\t%.3f\n" % (
            el[0], el[1], el[2].upper(), el[3], nbCns, el[5], ratio, nbA, nbC, nbG, nbT, cov, ratio2))
        line = fM.readline()

    of.close()
    fM.close()
Ejemplo n.º 2
0
def fromVarfilter2tsvLike(ficVar):

    ficSnp = traitementSamtools.decomposeVarfilter(ficVar)
    #ficSnp = ficVar
    #ficSnp = ficVar.replace("varfilter","SNP")
    ficTsv = "%s.tsv" % ficSnp

    of = open(ficTsv,"w")
    fM = open(ficSnp,"r")
    line = fM.readline()
    of.write("Chromosome\tposition\tReference\tResidue\tnb Residue(if A,C,T orG)\tQuality score?\t(nb ref)/cov\tnb A\tnb C\tnb G\tnb T\tcov\n")
    while line:
        el = line.split("\t")
	# ne tient pas compte des indels etc dans la ligne
	#print "---%s---" % el
        while string.find(el[8],"^") != -1: # si dedans
            el[8] = string.replace(el[8],el[8][string.find(el[8],"^"):string.find(el[8],"^")+2],"") #on vire les infos ^et 2 par rien
        lfin=removePatternFromLine("[\+|-]([0-9]+)([ACGTNacgtn]+)",el[8])
        lfin=lfin.upper()

        nbA=lfin.count("A")
        nbC=lfin.count("C")
        nbG=lfin.count("G")
        nbT=lfin.count("T")

        cov = string.atof(el[7])
        nbRef = el[8].count(".") + el[8].count(",")

        if el[2] == "a" or el[2] == "A":
            nbA = nbRef
        elif el[2] == "g" or el[2] == "G":
            nbG = nbRef
        elif el[2] == "c" or el[2] == "C":
            nbC = nbRef
        elif el[2] == "t" or el[2] == "T":
            nbT = nbRef

        if el[3] == "A":
            nbCns = nbA
        elif el[3] == "G":
            nbCns = nbG
        elif el[3] == "C":
            nbCns = nbC
        elif el[3] == "T":
            nbCns = nbT
        else :
            nbCns = "-"

        ratio = nbRef / cov

        of.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\t%s\t%s\t%s\t%s\t%.0f\n" % (el[0],el[1],el[2].upper(),el[3],nbCns,el[5],ratio,nbA,nbC,nbG,nbT,cov))
        line = fM.readline()

    of.close()
    fM.close()
    return ficTsv
Ejemplo n.º 3
0
def fromVarfilter2tsvLikeCyrielle(ficVar):
    """
    compte le nombre de A,C,G,T par Reads pour un SNP
    input: fichier pileup.SNP
    """
    
    ficSnp = traitementSamtools.decomposeVarfilter(ficVar)
    ficTsv = "%s.bestFilter.tsv" % ficSnp
    of = open(ficTsv,"w")
    fM = open(ficSnp,"r")
    line = fM.readline()
    of.write("Chromosome\tposition\tReference\tResidue\tnb Residue(if A,C,T orG)\tQuality score?\t(nb ref)/cov\tnb A\tnb C\tnb G\tnb T\tcov\t(nb max represented)/cov\n")
    while line:
        el = line.split("\t")
	# ne tient pas compte des indels etc dans la ligne
	#print "---%s---" % el
    
        while string.find(el[8],"^") != -1: # si dedans
            el[8] = string.replace(el[8],el[8][string.find(el[8],"^"):string.find(el[8],"^")+2],"") #on vire les infos ^et 2 par rien
        lfin=removePatternFromLine("[\+|-]([0-9]+)([ACGTNacgtn]+)",el[8])
        lfin=lfin.upper()

        nbA=lfin.count("A")
        nbC=lfin.count("C")
        nbG=lfin.count("G")
        nbT=lfin.count("T")

        cov = string.atof(el[7])

        nbRef = lfin.count(".") + lfin.count(",")

        el2=el[2].upper()
        if el2 == "A":
            nbA = nbRef
        elif el2 == "G":
            nbG = nbRef
        elif el2 == "C":
            nbC = nbRef
        elif el2 == "T":
            nbT = nbRef

        if el[3] == "A":
            nbCns = nbA
        elif el[3] == "G":
            nbCns = nbG
        elif el[3] == "C":
            nbCns = nbC
        elif el[3] == "T":
            nbCns = nbT
        else :
            nbCns = "-"

        listNb = [nbA,nbC,nbG,nbT]
        resMaj = max(listNb)
        ratio = nbRef / cov
        ratio2 = resMaj / cov
        if ratio2 <= 0.95:
            of.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\t%s\t%s\t%s\t%s\t%.0f\t%.3f\n" % (el[0],el[1],el[2].upper(),el[3],nbCns,el[5],ratio,nbA,nbC,nbG,nbT,cov,ratio2))
        line = fM.readline()

    of.close()
    fM.close()
    return ficTsv