Beispiel #1
0
def verifIdent(fic):
    """
    """
    ficOut = "outest.txt"
    f = open(ficOut,"w")

    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            idg = "-"
            idp = "-"
            lis = line.split("\t")
            loc1 = lis[0]
            loc2 = lis[1]
            if loc1 != "" and loc2 != "":
                ficg1 = "FastaGene/%s.tfa" % loc1
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2
                if os.path.isfile(ficg1) and os.path.isfile(ficg2):
                    alignement.ali_needle(ficg1,ficg2)
                    idg = alignement.extrait_id_needle("FastaGene/%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()))
                if os.path.isfile(ficp1) and os.path.isfile(ficp2):
                    alignement.ali_needle(ficp1,ficp2)
                    idp = alignement.extrait_id_needle("FastaProt/%s-%s.needle" % (files.get_name(ficp1).lower(),files.get_name(ficp2).lower()))
            f.write("%s\t%s\t%s\t%s\n" % (loc1,loc2,idg,idp))            
    f.close 
def lanceBlastxFromScaff():
    print "toto"
    allfile = glob.glob("/Users/afutil/Documents/Genolevures/PiFa/AssemblageGenome/AssemblagesFinaux/assemblageRef1.0/*.tfa")
    database = "/Users/afutil/Documents/Genolevures/PiFa/AssemblageGenome/AssemblagesFinaux/SynteniePistPiso/DBblast/pistPisoProtFmt6"
    repout = "/Users/afutil/Documents/Genolevures/PiFa/AssemblageGenome/AssemblagesFinaux/SynteniePistPiso/BlastxFmt6"
    
    for file in allfile:
        seq = fasta.seqEnVar(file)
        fname = files.get_name(file)
        print "%s\t%s" % (fname,len(seq)) 
        if len(seq) < 2000:
            outfile = "%s/%s.blastx" % (repout,fname)
            alignement.run_blastxFmt(file,outfile,database)
        else :
            fic1 = "%s/%s-deb.tfa" % (repout,fname)
            fic2 = "%s/%s-fin.tfa" % (repout,fname)
            of1 = open(fic1,"w")
            of2 = open(fic2,"w")
            #print ">%s\n%s\n" % (files.get_name(fic1),seq[0:1000])
            of1.write(">%s\n%s\n" % (files.get_name(fic1),seq[0:1000]))
            of2.write(">%s\n%s\n" % (files.get_name(fic2),seq[-1000:]))
            of1.close()
            of2.close()
            for fic in fic1,fic2:
                outfile = "%s/%s.blastx" % (repout,files.get_name(fic))
                alignement.run_blastxFmt(file,outfile,database)
Beispiel #3
0
def calcIdent(fic):
    """
    """
    if os.path.isdir(fic):
        allfile = glob.glob("%s/*.tfa" % fic)
    fout = "ficOut"
    f = open(fout,"w")
    f.write("el1\tel2\tid\tsim\n")
    listFic = allfile    
    for file in allfile :
        f1 = listFic[0]
        listFic = listFic[1:]
        for f2 in listFic:
            #ficg1 = "FastaGene/%s.tfa" % loc1[0:-1]
            #ficg2 = "FastaGene/%s.tfa" % loc2[0:-1]
            #ficp1 = "FastaProt/%s.tfa" % loc1[0:-1]
            #ficp2 = "FastaProt/%s.tfa" % loc2[0:-1]
            name = "%s-%s.needle" % (files.get_name(f1).lower(),files.get_name(f2).lower()) 
            outfile = "%s/%s" % (fic, name)
            if not os.path.isfile(outfile):
                alignement.ali_needle(f1,f2,outfile)
            if os.path.isfile(outfile):
                id = alignement.extrait_id_needle(outfile)
                sim = alignement.extrait_sim_needle(outfile)
                f.write("%s\t%s\t%s\t%s\n" % (files.get_name(f1),files.get_name(f2),id,sim))
    f.close()
def recreeFicPEfq(ficPE1, ficPE2, outDir):
    outR1 = "%s/%s.fq" % (outDir, files.get_name(ficPE1))
    outR2 = "%s/%s.fq" % (outDir, files.get_name(ficPE2))

    listeReadPE = creeListeDesPE(ficPE1, ficPE2)
    longL = len(listeReadPE)
    fM1 = open(ficPE1, "r")
    or1 = open(outR1, "w")
    line1 = fM1.readline()
    toW = 0
    i = 0
    while line1 and i < longL:
        #if line1[0]== "@":
        #if line1[0:5]== "@HWI-":
        if line1[0:4] == "@FCC":
            toW = 0
            if line1[:-2] == listeReadPE[i]:
                #if line1[:-7] == listeReadPE[i]:
                toW = 1
                i += 1

        if toW == 1:
            or1.write("%s" % line1)
            if i == longL:
                j = 0
                while j < 3:
                    line1 = fM1.readline()
                    or1.write("%s" % line1)
                    j += 1
        line1 = fM1.readline()
    fM1.close()
    or1.close()

    fM2 = open(ficPE2, "r")
    or2 = open(outR2, "w")
    line2 = fM2.readline()
    toW = 0
    i = 0
    while line2 and i < longL:
        #if line2[0]== "@":
        #if line2[0:5]== "@HWI-":
        if line2[0:4] == "@FCC":
            toW = 0
            if line2[:-2] == listeReadPE[i]:
                #if line2[:-7] == listeReadPE[i]:
                toW = 1
                i += 1
        if toW == 1:
            or2.write("%s" % line2)
            if i == longL:
                j = 0
                while j < 3:
                    line2 = fM2.readline()
                    or2.write("%s" % line2)
                    j += 1
        line2 = fM2.readline()
    fM2.close()
    or2.close()
Beispiel #5
0
def ali_needleFasta(fic1,fic2,outfile = "", gop = 10, gep = 0.5):
    # par defaut, le fichier de sortie est place dans le repertoire du fichier 1
    if outfile == "":
        rep = files.get_filepath(fic1)
        name = "%s-%s.needle" % (files.get_name(fic1).lower(),files.get_name(fic2).lower()) 
        outfile = "%s/%s" % (rep, name)
        
    cmd = "%s -asequence %s -bsequence %s -gapopen %s -gapextend %s -outfile %s -aformat3 fasta" % (System.NEEDLE,fic1,fic2,gop,gep,outfile)
    os.system(cmd)
Beispiel #6
0
def ali_water(fic1,fic2,outfile = "", gop = 10, gep = 0.5):
    # par defaut, le fichier de sortie est place dans le repertoire du fichier 1
    if outfile == "":
        rep = files.get_filepath(fic1)
        name = "%s-%s.water" % (files.get_name(fic1).lower(),files.get_name(fic2).lower()) 
        outfile = "%s/%s" % (rep, name)
        
    cmd = "%s -asequence %s -bsequence %s -gapopen %s -gapextend %s -outfile %s" % (System.WATER,fic1,fic2,gop,gep,outfile)
    #print cmd
    os.system(cmd)
def lanceAllSoapCoverageSNP():
    
    allfile = glob.glob("/test/friedric/Pifa/Assemblage/SoapSplite/ResuParScaff/resuL1234SE*")
    for fileSE in allfile:
        scaff = files.get_name(fileSE)[12:]
        filePE = fileSE.replace('resuL1234SE','resuL1234PE')
        ref = "/test/friedric/Pifa/Assemblage/SoapSplite/ScaffRef/%s.tfa" % scaff
        resu = "/test/friedric/Pifa/Assemblage/SoapSplite/SoapCoverage/%s.cov" % files.get_name(fileSE.replace('resuL1234SE','resuL1234'))
        cmd = "/test/friedric/Pifa/SOAPcoverage/2.7.7/soap.coverage -phy -refsingle %s -il_single %s -il_soap %s -o %s" % (ref,fileSE,filePE,resu)
        print cmd
Beispiel #8
0
def ali_stretcherFasta(fic1,fic2,outfile = ""):
    # par defaut, le fichier de sortie est place dans le repertoire du fichier 1
    if outfile == "":
        rep = files.get_filepath(fic1)
        name = "%s-%s.stretcher" % (files.get_name(fic1).lower(),files.get_name(fic2).lower()) 
        outfile = "%s/%s" % (rep, name)
    if not os.path.isfile(outfile):    
        cmd = "%s -asequence %s -bsequence %s -outfile %s -aformat3 fasta" % (System.STRETCHER,fic1,fic2,outfile)
        #print cmd
        os.system(cmd)
    else:
        print "%s already exists" % outfile
def extraitPEUnmappedReads(ficSamX, reads1, reads2, strain):
    readsUnmapped1 = "/Volumes/BioSan/Users/friedrich/Gonorrhoeae/BWA/UnmappedReads/%s/%s.fq" % (
    strain, files.get_name(reads1).replace("Cleandata", "unmapped"))
    readsUnmapped2 = "/Volumes/BioSan/Users/friedrich/Gonorrhoeae/BWA/UnmappedReads/%s/%s.fq" % (
    strain, files.get_name(reads2).replace("Cleandata", "unmapped"))

    fM = open(ficSamX, "r")
    # cree liste des paires ou l un au moins n est pas mappe
    lUnmapped = list()
    line = fM.readline()
    el = line.split("\t")
    lUnmapped.append(el[0])
    line = fM.readline()
    while line:
        el = line.split("\t")
        if el[0] != lUnmapped[-1]:
            lUnmapped.append(el[0])
        line = fM.readline()
    fM.close()
    print len(lUnmapped)
    fr1 = open(reads1, "r")
    fr2 = open(reads2, "r")
    of1 = open(readsUnmapped1, "w")
    of2 = open(readsUnmapped2, "w")

    lineR1 = fr1.readline()
    lineR2 = fr2.readline()
    i = 0
    toW = 0

    while lineR1:
        # attention: condition a adpater en fonction du format des reads
        if lineR1[0:5] == "@FCC1":
            el = lineR1.split("\t")
            if i < len(lUnmapped):
                # attention: condition a adpater en fonction du format des reads
                if el[0][1:-3] == lUnmapped[i]:
                    i += 1
                    toW = 1
                else:
                    toW = 0
            else:
                toW = 0
        if toW == 1:
            of1.write(lineR1)
            of2.write(lineR2)

        lineR1 = fr1.readline()
        lineR2 = fr2.readline()

    of1.close()
    of2.close()
Beispiel #10
0
def defSimilarite(fic):
    """
    """
    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            nsimg = "-"
            nsimp = "-"
            simg = "-"
            simp = "-"
            lis = line.split("\t")
            loc1 = lis[0]
            loc2 = lis[2]
                
            if loc1 != "" and loc2 != "":
                
                ficg1 = "FastaGene/%s.tfa" % loc1
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2
                outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())
                if os.path.isfile("FastaGene/%s" % outf):
                    sizeg1 = len(fasta.seqEnVar(ficg1))
                    sizeg2 = len(fasta.seqEnVar(ficg2))
                    if sizeg1 > sizeg2:
                        sizeg = sizeg2 
                    else:
                        sizeg = sizeg1 
                    
                    simg = string.atof(alignement.extrait_sim_needle("FastaGene/%s" % outf))
                    nsimg = alignement.extrait_nbsim_needle("FastaGene/%s" % outf)
                    nsimg = string.atof(nsimg)/sizeg*100
                if os.path.isfile("FastaProt/%s" % outf):
                    sizep1 = len(fasta.seqEnVar(ficp1))
                    sizep2 = len(fasta.seqEnVar(ficp2))
                    if sizep1 > sizep2:
                        sizep = sizep2 
                    else:
                        sizep = sizep1 
                    simp = string.atof(alignement.extrait_sim_needle("FastaProt/%s" % outf))
                    nsimp = alignement.extrait_nbsim_needle("FastaProt/%s" % outf)
                    nsimp = string.atof(nsimp)/sizep*100
                if simp != "-" and simg != "-":
                    print "%.1f\t%.1f\t%s\t%s\t%.1f\t%.1f\t" % (simg,simp,loc1,loc2,nsimg,nsimp)
                else: 
                    print "%s\t%s\t%s\t%s\t%s\t%s" % (simg,simp,loc1,loc2,nsimg,nsimp)
            else:
                print "\t\t%s\t%s" % (loc1,loc2) 
Beispiel #11
0
def defNouvelIdent(fic):
    """
    """
    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            nidg = "-"
            nidp = "-"
            idg = "-"
            idp = "-"
            lis = line.split("\t")
            loc1 = lis[0]
            loc2 = lis[2]
                
            if loc1 != "" and loc2 != "":
                
                ficg1 = "FastaGene/%s.tfa" % loc1
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2
                outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())
                if os.path.isfile("FastaGene/%s" % outf):
                    sizeg1 = len(fasta.seqEnVar(ficg1))
                    sizeg2 = len(fasta.seqEnVar(ficg2))
                    if sizeg1 > sizeg2:
                        sizeg = sizeg2 
                    else:
                        sizeg = sizeg1 
                    
                    idg = string.atof(alignement.extrait_id_needle("FastaGene/%s" % outf))
                    nidg = alignement.extrait_nbid_needle("FastaGene/%s" % outf)
                    nidg = string.atof(nidg)/sizeg*100
                if os.path.isfile("FastaProt/%s" % outf):
                    sizep1 = len(fasta.seqEnVar(ficp1))
                    sizep2 = len(fasta.seqEnVar(ficp2))
                    if sizep1 > sizep2:
                        sizep = sizep2 
                    else:
                        sizep = sizep1 
                    idp = string.atof(alignement.extrait_id_needle("FastaProt/%s" % outf))
                    nidp = alignement.extrait_nbid_needle("FastaProt/%s" % outf)
                    nidp = string.atof(nidp)/sizep*100
                if idp != "-" and idg != "-":
                    print "%.1f\t%.1f\t%s\t%s\t%.1f\t%.1f\t" % (idg,idp,loc1,loc2,nidg,nidp)
                else: 
                    print "%s\t%s\t%s\t%s\t%s\t%s" % (idg,idp,loc1,loc2,nidg,nidp)
            else:
                print "\t\t%s\t%s" % (loc1,loc2) 
Beispiel #12
0
def reverseComplement(fastq,outfile=""):
	
	if outfile == "":
		outfile = "%s_RC.fq" % files.get_name(fastq)
		
	cmd = "/Volumes/BioSan/opt/fastx_toolkit/fastx_reverse_complement -i %s -o %s" % (fastq,outfile)
	os.system(cmd)
def lancetBlastnGeneIntron():
    
    repGenome = "/Users/afutil/Documents/Genolevures/PiFa/Annotation/PourPascal/tBlastNPisoIntron"
    allfile = glob.glob("%s/ProtIntron/*.tfa" % repGenome)
    #outdir1 = "%s/tBlastn" % repGenome
    outdir2 = "%s/tBlastnFmt6" % repGenome
    #db1 = "%s/DBblast/pifaScaff" % repGenome
    db2 = "%s/DBblast/pifaScaffFmt6" % repGenome
    #print "outdir1 = %s, db1 = %s" % (outdir1,db1)
    for file in allfile:
        
       # print "--%s--" % file
        #outfile1 = "%s/%s.tblastn" % (outdir1,files.get_name(file).lower())
        #if not os.path.isfile(outfile1):
         #   print "++%s++" % outfile1
          #  alignement.run_tblastn(file,outfile1,db1)
        #else:
         #   print "outfile1 exists"
        
        outfile2 = "%s/%s.tblastn" % (outdir2,files.get_name(file).lower())
        if not os.path.isfile(outfile2):
            print "++%s++" % outfile2
            alignement.run_tblastnFmt(file,outfile2,db2)
        else:
            print "outfile2 exists"
Beispiel #14
0
def renommeFic(dir,suffix):
    
    allfile = glob.glob("%s/*.tfa" % dir)
    for fic in allfile:
        newName = "%s-%s.tfa" % (files.get_name(fic),suffix)
        newFile = "%s%s" % (dir,newName)
        shutil.copy(fic,newFile)
Beispiel #15
0
def creeFic6Strains():

    liStrains = ["CBS10367", "CBS5828", "CBS3082a", "NRBC10572", "68917-2", "CBS6546"]
    repO = "/Users/anfutil/Documents/Projets/GB-3G/LDHat/6souches"
    # liChrom = ['Sakl0A','Sakl0B','Sakl0C','Sakl0D','Sakl0E','Sakl0F','Sakl0G','Sakl0H']
    for rep in glob.glob("/Users/anfutil/Documents/Projets/GB-3G/LDHat/Sakl0?"):
        nomRep = files.get_name(rep)
        repOut = "%s/%s" % (repO, nomRep)
        if not os.path.isdir(repOut):
            os.mkdir(repOut)

        ficSites = "%s/%s.sites" % (rep, nomRep)
        ficOut = "%s/%s.sites" % (repOut, nomRep)
        of = open(ficOut, "w")

        lines = open(ficSites, "r").read().split("\n")
        el = lines[0].split("\t")
        of.write("6\t%s\t%s\n" % (el[1], el[2]))
        toW = 0
        for line in lines[1:]:
            if line != "":
                if line[0] == ">":
                    if line[1:] in liStrains:
                        toW = 1
                    else:
                        toW = 0
                if toW == 1:
                    of.write("%s\n" % line)
        of.close()
Beispiel #16
0
def ajoutIncrementalPrefHeader(infile, prefix, outfile=""):
    """
    """
    if outfile == "":
        outfile = "%s-incrPrefix.tfa" % (files.get_name(infile))
        print "outfile est %s" % outfile
    f = open(outfile, "w")
    lines = open(infile, "r").read().split("\n")
    i = 1
    for line in lines:
        if line != "":
            if line[0] == ">":
                if i < 10:
                    digit = "000%s" % i
                elif i < 100:
                    digit = "00%s" % i
                elif i < 1000:
                    digit = "0%s" % i
                else:
                    digit = i

                f.write(">%s%s %s\n" % (prefix, digit, line[1:]))
                i = i + 1
            else:
                f.write("%s\n" % line)
    f.close()
def lanceRechercheSimInc(file,repOut,db):

    #outfile = "%s/%s-11strains.tblastn" % (repOut,files.get_name(file))
    #outfileB = "%s/%s-11strains-Fmt6.tblastn" % (repOut,files.get_name(file))
    outfile = "%s/%s-4strains.tblastn" % (repOut,files.get_name(file))
    outfileB = "%s/%s-4strains-Fmt6.tblastn" % (repOut,files.get_name(file))
    if not os.path.isfile(outfile):
        alignement.run_tblastn(file,outfile,db)
    else:
        print "%s exists" % outfile
    if not os.path.isfile(outfileB):
        alignement.run_tblastnFmt(file,outfileB,db)
    else:
        print "%s exists" % outfileB
        
    return outfileB
def statsALarracheALaChaine(rep):
    
    print "GeneRef\tlongRefP\tnbSeqPil01\tlongIL01P\tnbDiffP\tnbDiffPIsolees\tlongRefG\tnbSeqGil01\tlongIL01G\tnbDiffG\tnbDiffGIsolees"
    outfile = "stat.txt"
    for file in glob.glob("%s/*.tfa" % rep):
        suff = files.get_name(file)
        statsALarrache(suff,outfile)
def lancementPipeAnalyseComplete(rep):
    
    repSim = "%s/tblastn" % rep
    repGene = "%s/SeqGene" % rep
    repProt = "%s/SeqProt" % rep
    repAliG = "%s/AliGene" % rep
    repAliP = "%s/AliProt" % rep
    repGeneFY = "/Users/afutil/Documents/DataJoseph/SaceStrains/FY/Gene"
    #db = ssys.DB11STRAINS
    #repSeq = ssys.SQ11STRAINS
    db = ssys.DB4STRAINS
    repSeq = ssys.SQ4STRAINS
    
    if not os.path.isdir(repSim):
        os.mkdir(repSim)
    if not os.path.isdir(repGene):
        os.mkdir(repGene)
    if not os.path.isdir(repProt):
        os.mkdir(repProt)
    if not os.path.isdir(repAliG):
        os.mkdir(repAliG)
    if not os.path.isdir(repAliP):
        os.mkdir(repAliP)
    
    for file in glob.glob("%s/*.tfa" % rep):
        suff = files.get_name(file)
        print "sim..."
        ficSim = lanceRechercheSimInc(file,repSim,db)
        print "creation gene..."
        creeGeneSelonSim(ficSim,repGene,repSeq)
        print "creation prot..."
        creeSeqProtSelonSeqGene(suff,repGene,repProt)
        # copie la seq proteique dans le bon rep
        newF = "%s/%s.tfa" % (repProt,suff) 
        shutil.copyfile(file, newF)
        #seqToAlign = "%s-11strains.fasta" % suff
        seqToAlign = "%s-4strains.fasta" % suff
        concatSeqAAligner(suff,repProt,seqToAlign)
        
        # copie la seq nucleique dans le bon rep
        ficG = "%s/%s.tfa" % (repGeneFY,suff)
        newFicG = "%s/%s.tfa" % (repGene,suff)
        shutil.copyfile(ficG, newFicG)
        #seqGToAlign = "%s-G-11strains.fasta" % suff
        seqGToAlign = "%s-G-4strains.fasta" % suff
        concatSeqAAligner(suff,repGene,seqGToAlign)
        
        # construction alignement prot
        print "seqtoalign est: %s" % seqToAlign
        print "alignement..."
        ficAliP = "%s/%s" % (repAliP,seqToAlign)
        if not os.path.isfile(ficAliP):
            #alignement.run_clustalw("%s/%s" % (repProt,seqToAlign),ficAliP,fasta,1)
            alignement.run_mafft("%s/%s" % (repProt,seqToAlign),ficAliP)
        # reste a tranalign en ali gene. attention : je dois aussi cree le fic du gene de fy et l incorpore dans un fic de gene global
        if os.path.isfile(ficAliP):
            ficAliG = "%s/%s" % (repAliG,seqGToAlign)
            alignement.run_tranalign("%s/%s" % (repGene,seqGToAlign),ficAliP,ficAliG)
def stats4StrainsALarracheALaChaine(rep):  
    #print "GeneRef\tlongRefP\tnbSeqPil01\tlongIL01P\tnbDiffP\tnbDiffPIsolees\tlongRefG\tnbSeqGil01\tlongIL01G\tnbDiffG\tnbDiffGIsolees"
    outfile = "stat.txt"
    of = open(outfile, 'a') 
    of.write("gene\tposition\tresIL01\tresFY\tresWE372\tresNC02\tresYJM981\tIL=WE?=NC?\tYJM=FY\tProfilOK\n")
    of.close()
    for file in glob.glob("%s/*.tfa" % rep):
        suff = files.get_name(file)
        stats4StrainsALarrache(suff,outfile)
def lanceAlignementsWater(repWork):
    repout = "%s/Alignements" % repWork
    allfile = glob.glob("%s/*.tfa" % repWork)
    for file in allfile:
        fname = files.get_name(file)
        outFile = "%s/%s.water" % (repout,fname)
        name = fname.split("_")[0]
        ficFY = "/Users/afutil/Documents/DataJoseph/Incompatibilites/FY4-IL01/MappedRegions/ProtFY/RegionChr8/%s.tfa" % name
        alignement.ali_water(file,ficFY,outFile)
Beispiel #22
0
def lanceDelly(aln, repOut, ref):
    out = "%s/%s.delly" % (repOut, files.get_name(aln))
    # options DELLY
    # -g pour donner le genome de reference (utilise pour le mapping)
    # -q min. paired-end mapping quality (-q 1 calls uniquely mapped reads)
    # -p inclus breakpoint detection
    cmd = "{0:s} -p -g {1:s} -q 1 -o {2:s} {3:s}".format(System.DELLY, ref, out, aln)
    os.system(cmd)
    return out
Beispiel #23
0
def lanceJumpy(aln, repOut, ref):
    out = "%s/%s.jumpy" % (repOut, files.get_name(aln))
    # options JUMPY
    # -g pour donner le genome de reference (utilise pour le mapping)
    # -q min. paired-end mapping quality (-q 1 calls uniquely mapped reads)
    # -p inclus breakpoint detection
    cmd = "%s -p -g %s -q 1 -o %s %s" % (System.JUMPY, ref, out, aln)
    os.system(cmd)
    return out
def lanceReverseComplementALaChaine():
    for fqFile in glob.glob("/Volumes/BioSan/Users/friedrich/Reads/BGI/20130726/CleanData/CBS4104/*.fq"):
        repOut = files.get_filepath(fqFile).replace("CleanData/", "CleanData/ReverseComplement/")
        #repOut = rep
        if not os.path.isdir(repOut):
            os.mkdir(repOut)
            cmd = "chmod 777 %s" % repOut
            os.system(cmd)
        outfile = "%s/%s.fq" % (repOut, files.get_name(fqFile))
        reverseComplement(fqFile, outfile)
def creeSeqProtSelonSeqGene(suff,repIn,repOut): 
    allfile = glob.glob("%s/%s*.tfa" % (repIn,suff))
    for file in allfile:
        print file
        ficName = files.get_name(file)
        outFile = "%s/%s.tfa" % (repOut,ficName)
        if not os.path.isfile(outFile):
            sequences.translateSeq(file, outFile,1)    
        else:
            print "ouFile : %s already exists" % outFile
Beispiel #26
0
def verifLongEtExtrGene():
    allfile = glob.glob("/Users/anfutil/Documents/Genolevures/Pifa/Annotation/PropositionGenesSelontBlastN/Combinaison/*.tfa")
    print "GeneName\tSeqLen"
    for file in allfile:
        seq =  fasta.seqEnVar(file)
        if seq[0:3] == "ATG":
            #if seq[-3:] == "TAA" or seq[-3:] == "TAG" or seq[-3:] == "TGA":
            if seq[-3:] in ["TAA","TGA","TAG"]:
                if len(seq) < 900:
                    print "%s\t%s" % (files.get_name(file), len(seq))
Beispiel #27
0
def copieFicInteret(file):
    lines = open(file,"r").read().split("\n")  
    for line in lines:
        if line != "":
            lis = line.split("\t")
            fic = lis[0]
            ofic1 = "%s.blastp" % files.get_name(fic).lower()
            if os.path.isfile(ofic1):
                ofic2 = "Subset/%s" % ofic1
                shutil.copy(ofic1,ofic2)
def lanceAllSoapSNP():
    
    allfile = glob.glob("/Users/anfutil/Documents/Genolevures/Pifa/AssemblageGenome/TestSoapAligner/SoapSplite/ResuParScaff/*_trie")
    for file in allfile:
        scaff = files.get_name(file)[12:-5]
        ref = "/Users/anfutil/Documents/Genolevures/Pifa/AssemblageGenome/TestSoapAligner/SoapSplite/ScaffRef/%s.tfa" % scaff
        resu = "/Users/anfutil/Documents/Genolevures/Pifa/AssemblageGenome/TestSoapAligner/SoapSplite/SoapSNP/%s_consensus" % files.get_name(file)
        cmd = "soapsnp -i %s -d %s -o %s -r 0.0001 -t -u -m" % (file,ref,resu)
        #print cmd
        os.system(cmd)
Beispiel #29
0
def analyseBlastPrelim():
    
    # extrait infos pertinentes des resultats de blastp Fmt6
    repGenome = "/Users/afutil/Documents/Genolevures/Pifa/Annotation/ModelGenes/ProtJigsaw/BlastP/ContrePiso/Eq1n"
    allfile = glob.glob("%s/*-Fmt6.blastp" % repGenome)
    repPiso = "/Users/afutil/Documents/Genolevures/Piso/SeqFinales/FastaProt"
    #repPiso = "/Users/afutil/Documents/Genolevures/Pist/Fasta/CGOB"
    repPab = "/Users/afutil/Documents/Genolevures/Pifa/Annotation/ModelGenes/ProtJigsaw"
    print "SeqPifa\tChromoPifa\tnumGene\tlgSeqPifa\tnbXSeqPifa\tSeqPist\tlgSeqPist\tlgAli\t%ageId\tBestEvalue"
    
    for file in allfile:
        pab = files.get_name(file).replace("-Fmt6","")
        chromo = pab[5:10]
        numGen = pab[12:]
        ficPab = "%s/PIFA.%s.tfa" % (repPab,pab[5:])
        seqPab = fasta.seqEnVar(ficPab) 
        lgPab = len(seqPab)
        nbXPab = seqPab.count('X')
        #print pab
        lgResu = open(file,"r").read().split("\n")
        piso = ""   
        lgAli = []
        id = []
        lgSeqPiso = 0    
        for resu in lgResu:
            if resu != "":
                elem = resu.split("\t")
                #print elem[1][8:]
                if piso == "" or piso == elem[1][8:]:
                    piso = elem[1][8:]
                    #print piso
                    if lgSeqPiso == 0:
                        ficPiso = "%s/%s.tfa" % (repPiso,piso)
                        #print ficPiso
                        lgSeqPiso = len(fasta.seqEnVar(ficPiso))
                        eval = elem[10]
                        #print eval
                    lgAli.append(elem[3])
                    #print lgAli
                    id.append(elem[2])
                    #print id
            else:
                if piso == "":
                    print "%s\t%s\t%s\t%s\t%s\tNo hits found" % (pab,chromo,numGen,lgPab,nbXPab)
                else:
                    idT = 0
                    lgAliT = 0
                    i = 0
                    while i < len(id) :
                        idT += string.atof(id[i])*string.atof(lgAli[i]) 
                        lgAliT += string.atoi(lgAli[i])
                        i += 1
                    ident = idT/lgAliT
                    print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.2f\t%s" % (pab,chromo,numGen,lgPab,nbXPab,piso,lgSeqPiso,lgAliT,ident,eval)
                    break
Beispiel #30
0
def creeFastaSeqPartielle(infile, deb, fin):
    """
    cree un fichier fasta contenant une partie de la sequence
    (de la position de debut a la position de fin) issue du fichier fourni
    """
    sequence = seqEnVar(infile)
    seqPart = extraitSeqPartielle(sequence, deb, fin)
    header = "%s_%s-%s" % (files.get_name(infile), deb, fin)
    outfile = "%s.fasta" % header
    if not os.path.isfile(outfile):
        fromSeqToFasta(seqPart, header, outfile)