def verifIdent(fic): """ """ ficOut = "outest.txt" f = open(ficOut,"w") lines = open(fic,"r").read().split("\n") for line in lines: if line != "": idg = "-" idp = "-" lis = line.split("\t") loc1 = lis[0] loc2 = lis[1] if loc1 != "" and loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc1 ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 if os.path.isfile(ficg1) and os.path.isfile(ficg2): alignement.ali_needle(ficg1,ficg2) idg = alignement.extrait_id_needle("FastaGene/%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())) if os.path.isfile(ficp1) and os.path.isfile(ficp2): alignement.ali_needle(ficp1,ficp2) idp = alignement.extrait_id_needle("FastaProt/%s-%s.needle" % (files.get_name(ficp1).lower(),files.get_name(ficp2).lower())) f.write("%s\t%s\t%s\t%s\n" % (loc1,loc2,idg,idp)) f.close
def lanceBlastxFromScaff(): print "toto" allfile = glob.glob("/Users/afutil/Documents/Genolevures/PiFa/AssemblageGenome/AssemblagesFinaux/assemblageRef1.0/*.tfa") database = "/Users/afutil/Documents/Genolevures/PiFa/AssemblageGenome/AssemblagesFinaux/SynteniePistPiso/DBblast/pistPisoProtFmt6" repout = "/Users/afutil/Documents/Genolevures/PiFa/AssemblageGenome/AssemblagesFinaux/SynteniePistPiso/BlastxFmt6" for file in allfile: seq = fasta.seqEnVar(file) fname = files.get_name(file) print "%s\t%s" % (fname,len(seq)) if len(seq) < 2000: outfile = "%s/%s.blastx" % (repout,fname) alignement.run_blastxFmt(file,outfile,database) else : fic1 = "%s/%s-deb.tfa" % (repout,fname) fic2 = "%s/%s-fin.tfa" % (repout,fname) of1 = open(fic1,"w") of2 = open(fic2,"w") #print ">%s\n%s\n" % (files.get_name(fic1),seq[0:1000]) of1.write(">%s\n%s\n" % (files.get_name(fic1),seq[0:1000])) of2.write(">%s\n%s\n" % (files.get_name(fic2),seq[-1000:])) of1.close() of2.close() for fic in fic1,fic2: outfile = "%s/%s.blastx" % (repout,files.get_name(fic)) alignement.run_blastxFmt(file,outfile,database)
def calcIdent(fic): """ """ if os.path.isdir(fic): allfile = glob.glob("%s/*.tfa" % fic) fout = "ficOut" f = open(fout,"w") f.write("el1\tel2\tid\tsim\n") listFic = allfile for file in allfile : f1 = listFic[0] listFic = listFic[1:] for f2 in listFic: #ficg1 = "FastaGene/%s.tfa" % loc1[0:-1] #ficg2 = "FastaGene/%s.tfa" % loc2[0:-1] #ficp1 = "FastaProt/%s.tfa" % loc1[0:-1] #ficp2 = "FastaProt/%s.tfa" % loc2[0:-1] name = "%s-%s.needle" % (files.get_name(f1).lower(),files.get_name(f2).lower()) outfile = "%s/%s" % (fic, name) if not os.path.isfile(outfile): alignement.ali_needle(f1,f2,outfile) if os.path.isfile(outfile): id = alignement.extrait_id_needle(outfile) sim = alignement.extrait_sim_needle(outfile) f.write("%s\t%s\t%s\t%s\n" % (files.get_name(f1),files.get_name(f2),id,sim)) f.close()
def recreeFicPEfq(ficPE1, ficPE2, outDir): outR1 = "%s/%s.fq" % (outDir, files.get_name(ficPE1)) outR2 = "%s/%s.fq" % (outDir, files.get_name(ficPE2)) listeReadPE = creeListeDesPE(ficPE1, ficPE2) longL = len(listeReadPE) fM1 = open(ficPE1, "r") or1 = open(outR1, "w") line1 = fM1.readline() toW = 0 i = 0 while line1 and i < longL: #if line1[0]== "@": #if line1[0:5]== "@HWI-": if line1[0:4] == "@FCC": toW = 0 if line1[:-2] == listeReadPE[i]: #if line1[:-7] == listeReadPE[i]: toW = 1 i += 1 if toW == 1: or1.write("%s" % line1) if i == longL: j = 0 while j < 3: line1 = fM1.readline() or1.write("%s" % line1) j += 1 line1 = fM1.readline() fM1.close() or1.close() fM2 = open(ficPE2, "r") or2 = open(outR2, "w") line2 = fM2.readline() toW = 0 i = 0 while line2 and i < longL: #if line2[0]== "@": #if line2[0:5]== "@HWI-": if line2[0:4] == "@FCC": toW = 0 if line2[:-2] == listeReadPE[i]: #if line2[:-7] == listeReadPE[i]: toW = 1 i += 1 if toW == 1: or2.write("%s" % line2) if i == longL: j = 0 while j < 3: line2 = fM2.readline() or2.write("%s" % line2) j += 1 line2 = fM2.readline() fM2.close() or2.close()
def ali_needleFasta(fic1,fic2,outfile = "", gop = 10, gep = 0.5): # par defaut, le fichier de sortie est place dans le repertoire du fichier 1 if outfile == "": rep = files.get_filepath(fic1) name = "%s-%s.needle" % (files.get_name(fic1).lower(),files.get_name(fic2).lower()) outfile = "%s/%s" % (rep, name) cmd = "%s -asequence %s -bsequence %s -gapopen %s -gapextend %s -outfile %s -aformat3 fasta" % (System.NEEDLE,fic1,fic2,gop,gep,outfile) os.system(cmd)
def ali_water(fic1,fic2,outfile = "", gop = 10, gep = 0.5): # par defaut, le fichier de sortie est place dans le repertoire du fichier 1 if outfile == "": rep = files.get_filepath(fic1) name = "%s-%s.water" % (files.get_name(fic1).lower(),files.get_name(fic2).lower()) outfile = "%s/%s" % (rep, name) cmd = "%s -asequence %s -bsequence %s -gapopen %s -gapextend %s -outfile %s" % (System.WATER,fic1,fic2,gop,gep,outfile) #print cmd os.system(cmd)
def lanceAllSoapCoverageSNP(): allfile = glob.glob("/test/friedric/Pifa/Assemblage/SoapSplite/ResuParScaff/resuL1234SE*") for fileSE in allfile: scaff = files.get_name(fileSE)[12:] filePE = fileSE.replace('resuL1234SE','resuL1234PE') ref = "/test/friedric/Pifa/Assemblage/SoapSplite/ScaffRef/%s.tfa" % scaff resu = "/test/friedric/Pifa/Assemblage/SoapSplite/SoapCoverage/%s.cov" % files.get_name(fileSE.replace('resuL1234SE','resuL1234')) cmd = "/test/friedric/Pifa/SOAPcoverage/2.7.7/soap.coverage -phy -refsingle %s -il_single %s -il_soap %s -o %s" % (ref,fileSE,filePE,resu) print cmd
def ali_stretcherFasta(fic1,fic2,outfile = ""): # par defaut, le fichier de sortie est place dans le repertoire du fichier 1 if outfile == "": rep = files.get_filepath(fic1) name = "%s-%s.stretcher" % (files.get_name(fic1).lower(),files.get_name(fic2).lower()) outfile = "%s/%s" % (rep, name) if not os.path.isfile(outfile): cmd = "%s -asequence %s -bsequence %s -outfile %s -aformat3 fasta" % (System.STRETCHER,fic1,fic2,outfile) #print cmd os.system(cmd) else: print "%s already exists" % outfile
def extraitPEUnmappedReads(ficSamX, reads1, reads2, strain): readsUnmapped1 = "/Volumes/BioSan/Users/friedrich/Gonorrhoeae/BWA/UnmappedReads/%s/%s.fq" % ( strain, files.get_name(reads1).replace("Cleandata", "unmapped")) readsUnmapped2 = "/Volumes/BioSan/Users/friedrich/Gonorrhoeae/BWA/UnmappedReads/%s/%s.fq" % ( strain, files.get_name(reads2).replace("Cleandata", "unmapped")) fM = open(ficSamX, "r") # cree liste des paires ou l un au moins n est pas mappe lUnmapped = list() line = fM.readline() el = line.split("\t") lUnmapped.append(el[0]) line = fM.readline() while line: el = line.split("\t") if el[0] != lUnmapped[-1]: lUnmapped.append(el[0]) line = fM.readline() fM.close() print len(lUnmapped) fr1 = open(reads1, "r") fr2 = open(reads2, "r") of1 = open(readsUnmapped1, "w") of2 = open(readsUnmapped2, "w") lineR1 = fr1.readline() lineR2 = fr2.readline() i = 0 toW = 0 while lineR1: # attention: condition a adpater en fonction du format des reads if lineR1[0:5] == "@FCC1": el = lineR1.split("\t") if i < len(lUnmapped): # attention: condition a adpater en fonction du format des reads if el[0][1:-3] == lUnmapped[i]: i += 1 toW = 1 else: toW = 0 else: toW = 0 if toW == 1: of1.write(lineR1) of2.write(lineR2) lineR1 = fr1.readline() lineR2 = fr2.readline() of1.close() of2.close()
def defSimilarite(fic): """ """ lines = open(fic,"r").read().split("\n") for line in lines: if line != "": nsimg = "-" nsimp = "-" simg = "-" simp = "-" lis = line.split("\t") loc1 = lis[0] loc2 = lis[2] if loc1 != "" and loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc1 ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()) if os.path.isfile("FastaGene/%s" % outf): sizeg1 = len(fasta.seqEnVar(ficg1)) sizeg2 = len(fasta.seqEnVar(ficg2)) if sizeg1 > sizeg2: sizeg = sizeg2 else: sizeg = sizeg1 simg = string.atof(alignement.extrait_sim_needle("FastaGene/%s" % outf)) nsimg = alignement.extrait_nbsim_needle("FastaGene/%s" % outf) nsimg = string.atof(nsimg)/sizeg*100 if os.path.isfile("FastaProt/%s" % outf): sizep1 = len(fasta.seqEnVar(ficp1)) sizep2 = len(fasta.seqEnVar(ficp2)) if sizep1 > sizep2: sizep = sizep2 else: sizep = sizep1 simp = string.atof(alignement.extrait_sim_needle("FastaProt/%s" % outf)) nsimp = alignement.extrait_nbsim_needle("FastaProt/%s" % outf) nsimp = string.atof(nsimp)/sizep*100 if simp != "-" and simg != "-": print "%.1f\t%.1f\t%s\t%s\t%.1f\t%.1f\t" % (simg,simp,loc1,loc2,nsimg,nsimp) else: print "%s\t%s\t%s\t%s\t%s\t%s" % (simg,simp,loc1,loc2,nsimg,nsimp) else: print "\t\t%s\t%s" % (loc1,loc2)
def defNouvelIdent(fic): """ """ lines = open(fic,"r").read().split("\n") for line in lines: if line != "": nidg = "-" nidp = "-" idg = "-" idp = "-" lis = line.split("\t") loc1 = lis[0] loc2 = lis[2] if loc1 != "" and loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc1 ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()) if os.path.isfile("FastaGene/%s" % outf): sizeg1 = len(fasta.seqEnVar(ficg1)) sizeg2 = len(fasta.seqEnVar(ficg2)) if sizeg1 > sizeg2: sizeg = sizeg2 else: sizeg = sizeg1 idg = string.atof(alignement.extrait_id_needle("FastaGene/%s" % outf)) nidg = alignement.extrait_nbid_needle("FastaGene/%s" % outf) nidg = string.atof(nidg)/sizeg*100 if os.path.isfile("FastaProt/%s" % outf): sizep1 = len(fasta.seqEnVar(ficp1)) sizep2 = len(fasta.seqEnVar(ficp2)) if sizep1 > sizep2: sizep = sizep2 else: sizep = sizep1 idp = string.atof(alignement.extrait_id_needle("FastaProt/%s" % outf)) nidp = alignement.extrait_nbid_needle("FastaProt/%s" % outf) nidp = string.atof(nidp)/sizep*100 if idp != "-" and idg != "-": print "%.1f\t%.1f\t%s\t%s\t%.1f\t%.1f\t" % (idg,idp,loc1,loc2,nidg,nidp) else: print "%s\t%s\t%s\t%s\t%s\t%s" % (idg,idp,loc1,loc2,nidg,nidp) else: print "\t\t%s\t%s" % (loc1,loc2)
def reverseComplement(fastq,outfile=""): if outfile == "": outfile = "%s_RC.fq" % files.get_name(fastq) cmd = "/Volumes/BioSan/opt/fastx_toolkit/fastx_reverse_complement -i %s -o %s" % (fastq,outfile) os.system(cmd)
def lancetBlastnGeneIntron(): repGenome = "/Users/afutil/Documents/Genolevures/PiFa/Annotation/PourPascal/tBlastNPisoIntron" allfile = glob.glob("%s/ProtIntron/*.tfa" % repGenome) #outdir1 = "%s/tBlastn" % repGenome outdir2 = "%s/tBlastnFmt6" % repGenome #db1 = "%s/DBblast/pifaScaff" % repGenome db2 = "%s/DBblast/pifaScaffFmt6" % repGenome #print "outdir1 = %s, db1 = %s" % (outdir1,db1) for file in allfile: # print "--%s--" % file #outfile1 = "%s/%s.tblastn" % (outdir1,files.get_name(file).lower()) #if not os.path.isfile(outfile1): # print "++%s++" % outfile1 # alignement.run_tblastn(file,outfile1,db1) #else: # print "outfile1 exists" outfile2 = "%s/%s.tblastn" % (outdir2,files.get_name(file).lower()) if not os.path.isfile(outfile2): print "++%s++" % outfile2 alignement.run_tblastnFmt(file,outfile2,db2) else: print "outfile2 exists"
def renommeFic(dir,suffix): allfile = glob.glob("%s/*.tfa" % dir) for fic in allfile: newName = "%s-%s.tfa" % (files.get_name(fic),suffix) newFile = "%s%s" % (dir,newName) shutil.copy(fic,newFile)
def creeFic6Strains(): liStrains = ["CBS10367", "CBS5828", "CBS3082a", "NRBC10572", "68917-2", "CBS6546"] repO = "/Users/anfutil/Documents/Projets/GB-3G/LDHat/6souches" # liChrom = ['Sakl0A','Sakl0B','Sakl0C','Sakl0D','Sakl0E','Sakl0F','Sakl0G','Sakl0H'] for rep in glob.glob("/Users/anfutil/Documents/Projets/GB-3G/LDHat/Sakl0?"): nomRep = files.get_name(rep) repOut = "%s/%s" % (repO, nomRep) if not os.path.isdir(repOut): os.mkdir(repOut) ficSites = "%s/%s.sites" % (rep, nomRep) ficOut = "%s/%s.sites" % (repOut, nomRep) of = open(ficOut, "w") lines = open(ficSites, "r").read().split("\n") el = lines[0].split("\t") of.write("6\t%s\t%s\n" % (el[1], el[2])) toW = 0 for line in lines[1:]: if line != "": if line[0] == ">": if line[1:] in liStrains: toW = 1 else: toW = 0 if toW == 1: of.write("%s\n" % line) of.close()
def ajoutIncrementalPrefHeader(infile, prefix, outfile=""): """ """ if outfile == "": outfile = "%s-incrPrefix.tfa" % (files.get_name(infile)) print "outfile est %s" % outfile f = open(outfile, "w") lines = open(infile, "r").read().split("\n") i = 1 for line in lines: if line != "": if line[0] == ">": if i < 10: digit = "000%s" % i elif i < 100: digit = "00%s" % i elif i < 1000: digit = "0%s" % i else: digit = i f.write(">%s%s %s\n" % (prefix, digit, line[1:])) i = i + 1 else: f.write("%s\n" % line) f.close()
def lanceRechercheSimInc(file,repOut,db): #outfile = "%s/%s-11strains.tblastn" % (repOut,files.get_name(file)) #outfileB = "%s/%s-11strains-Fmt6.tblastn" % (repOut,files.get_name(file)) outfile = "%s/%s-4strains.tblastn" % (repOut,files.get_name(file)) outfileB = "%s/%s-4strains-Fmt6.tblastn" % (repOut,files.get_name(file)) if not os.path.isfile(outfile): alignement.run_tblastn(file,outfile,db) else: print "%s exists" % outfile if not os.path.isfile(outfileB): alignement.run_tblastnFmt(file,outfileB,db) else: print "%s exists" % outfileB return outfileB
def statsALarracheALaChaine(rep): print "GeneRef\tlongRefP\tnbSeqPil01\tlongIL01P\tnbDiffP\tnbDiffPIsolees\tlongRefG\tnbSeqGil01\tlongIL01G\tnbDiffG\tnbDiffGIsolees" outfile = "stat.txt" for file in glob.glob("%s/*.tfa" % rep): suff = files.get_name(file) statsALarrache(suff,outfile)
def lancementPipeAnalyseComplete(rep): repSim = "%s/tblastn" % rep repGene = "%s/SeqGene" % rep repProt = "%s/SeqProt" % rep repAliG = "%s/AliGene" % rep repAliP = "%s/AliProt" % rep repGeneFY = "/Users/afutil/Documents/DataJoseph/SaceStrains/FY/Gene" #db = ssys.DB11STRAINS #repSeq = ssys.SQ11STRAINS db = ssys.DB4STRAINS repSeq = ssys.SQ4STRAINS if not os.path.isdir(repSim): os.mkdir(repSim) if not os.path.isdir(repGene): os.mkdir(repGene) if not os.path.isdir(repProt): os.mkdir(repProt) if not os.path.isdir(repAliG): os.mkdir(repAliG) if not os.path.isdir(repAliP): os.mkdir(repAliP) for file in glob.glob("%s/*.tfa" % rep): suff = files.get_name(file) print "sim..." ficSim = lanceRechercheSimInc(file,repSim,db) print "creation gene..." creeGeneSelonSim(ficSim,repGene,repSeq) print "creation prot..." creeSeqProtSelonSeqGene(suff,repGene,repProt) # copie la seq proteique dans le bon rep newF = "%s/%s.tfa" % (repProt,suff) shutil.copyfile(file, newF) #seqToAlign = "%s-11strains.fasta" % suff seqToAlign = "%s-4strains.fasta" % suff concatSeqAAligner(suff,repProt,seqToAlign) # copie la seq nucleique dans le bon rep ficG = "%s/%s.tfa" % (repGeneFY,suff) newFicG = "%s/%s.tfa" % (repGene,suff) shutil.copyfile(ficG, newFicG) #seqGToAlign = "%s-G-11strains.fasta" % suff seqGToAlign = "%s-G-4strains.fasta" % suff concatSeqAAligner(suff,repGene,seqGToAlign) # construction alignement prot print "seqtoalign est: %s" % seqToAlign print "alignement..." ficAliP = "%s/%s" % (repAliP,seqToAlign) if not os.path.isfile(ficAliP): #alignement.run_clustalw("%s/%s" % (repProt,seqToAlign),ficAliP,fasta,1) alignement.run_mafft("%s/%s" % (repProt,seqToAlign),ficAliP) # reste a tranalign en ali gene. attention : je dois aussi cree le fic du gene de fy et l incorpore dans un fic de gene global if os.path.isfile(ficAliP): ficAliG = "%s/%s" % (repAliG,seqGToAlign) alignement.run_tranalign("%s/%s" % (repGene,seqGToAlign),ficAliP,ficAliG)
def stats4StrainsALarracheALaChaine(rep): #print "GeneRef\tlongRefP\tnbSeqPil01\tlongIL01P\tnbDiffP\tnbDiffPIsolees\tlongRefG\tnbSeqGil01\tlongIL01G\tnbDiffG\tnbDiffGIsolees" outfile = "stat.txt" of = open(outfile, 'a') of.write("gene\tposition\tresIL01\tresFY\tresWE372\tresNC02\tresYJM981\tIL=WE?=NC?\tYJM=FY\tProfilOK\n") of.close() for file in glob.glob("%s/*.tfa" % rep): suff = files.get_name(file) stats4StrainsALarrache(suff,outfile)
def lanceAlignementsWater(repWork): repout = "%s/Alignements" % repWork allfile = glob.glob("%s/*.tfa" % repWork) for file in allfile: fname = files.get_name(file) outFile = "%s/%s.water" % (repout,fname) name = fname.split("_")[0] ficFY = "/Users/afutil/Documents/DataJoseph/Incompatibilites/FY4-IL01/MappedRegions/ProtFY/RegionChr8/%s.tfa" % name alignement.ali_water(file,ficFY,outFile)
def lanceDelly(aln, repOut, ref): out = "%s/%s.delly" % (repOut, files.get_name(aln)) # options DELLY # -g pour donner le genome de reference (utilise pour le mapping) # -q min. paired-end mapping quality (-q 1 calls uniquely mapped reads) # -p inclus breakpoint detection cmd = "{0:s} -p -g {1:s} -q 1 -o {2:s} {3:s}".format(System.DELLY, ref, out, aln) os.system(cmd) return out
def lanceJumpy(aln, repOut, ref): out = "%s/%s.jumpy" % (repOut, files.get_name(aln)) # options JUMPY # -g pour donner le genome de reference (utilise pour le mapping) # -q min. paired-end mapping quality (-q 1 calls uniquely mapped reads) # -p inclus breakpoint detection cmd = "%s -p -g %s -q 1 -o %s %s" % (System.JUMPY, ref, out, aln) os.system(cmd) return out
def lanceReverseComplementALaChaine(): for fqFile in glob.glob("/Volumes/BioSan/Users/friedrich/Reads/BGI/20130726/CleanData/CBS4104/*.fq"): repOut = files.get_filepath(fqFile).replace("CleanData/", "CleanData/ReverseComplement/") #repOut = rep if not os.path.isdir(repOut): os.mkdir(repOut) cmd = "chmod 777 %s" % repOut os.system(cmd) outfile = "%s/%s.fq" % (repOut, files.get_name(fqFile)) reverseComplement(fqFile, outfile)
def creeSeqProtSelonSeqGene(suff,repIn,repOut): allfile = glob.glob("%s/%s*.tfa" % (repIn,suff)) for file in allfile: print file ficName = files.get_name(file) outFile = "%s/%s.tfa" % (repOut,ficName) if not os.path.isfile(outFile): sequences.translateSeq(file, outFile,1) else: print "ouFile : %s already exists" % outFile
def verifLongEtExtrGene(): allfile = glob.glob("/Users/anfutil/Documents/Genolevures/Pifa/Annotation/PropositionGenesSelontBlastN/Combinaison/*.tfa") print "GeneName\tSeqLen" for file in allfile: seq = fasta.seqEnVar(file) if seq[0:3] == "ATG": #if seq[-3:] == "TAA" or seq[-3:] == "TAG" or seq[-3:] == "TGA": if seq[-3:] in ["TAA","TGA","TAG"]: if len(seq) < 900: print "%s\t%s" % (files.get_name(file), len(seq))
def copieFicInteret(file): lines = open(file,"r").read().split("\n") for line in lines: if line != "": lis = line.split("\t") fic = lis[0] ofic1 = "%s.blastp" % files.get_name(fic).lower() if os.path.isfile(ofic1): ofic2 = "Subset/%s" % ofic1 shutil.copy(ofic1,ofic2)
def lanceAllSoapSNP(): allfile = glob.glob("/Users/anfutil/Documents/Genolevures/Pifa/AssemblageGenome/TestSoapAligner/SoapSplite/ResuParScaff/*_trie") for file in allfile: scaff = files.get_name(file)[12:-5] ref = "/Users/anfutil/Documents/Genolevures/Pifa/AssemblageGenome/TestSoapAligner/SoapSplite/ScaffRef/%s.tfa" % scaff resu = "/Users/anfutil/Documents/Genolevures/Pifa/AssemblageGenome/TestSoapAligner/SoapSplite/SoapSNP/%s_consensus" % files.get_name(file) cmd = "soapsnp -i %s -d %s -o %s -r 0.0001 -t -u -m" % (file,ref,resu) #print cmd os.system(cmd)
def analyseBlastPrelim(): # extrait infos pertinentes des resultats de blastp Fmt6 repGenome = "/Users/afutil/Documents/Genolevures/Pifa/Annotation/ModelGenes/ProtJigsaw/BlastP/ContrePiso/Eq1n" allfile = glob.glob("%s/*-Fmt6.blastp" % repGenome) repPiso = "/Users/afutil/Documents/Genolevures/Piso/SeqFinales/FastaProt" #repPiso = "/Users/afutil/Documents/Genolevures/Pist/Fasta/CGOB" repPab = "/Users/afutil/Documents/Genolevures/Pifa/Annotation/ModelGenes/ProtJigsaw" print "SeqPifa\tChromoPifa\tnumGene\tlgSeqPifa\tnbXSeqPifa\tSeqPist\tlgSeqPist\tlgAli\t%ageId\tBestEvalue" for file in allfile: pab = files.get_name(file).replace("-Fmt6","") chromo = pab[5:10] numGen = pab[12:] ficPab = "%s/PIFA.%s.tfa" % (repPab,pab[5:]) seqPab = fasta.seqEnVar(ficPab) lgPab = len(seqPab) nbXPab = seqPab.count('X') #print pab lgResu = open(file,"r").read().split("\n") piso = "" lgAli = [] id = [] lgSeqPiso = 0 for resu in lgResu: if resu != "": elem = resu.split("\t") #print elem[1][8:] if piso == "" or piso == elem[1][8:]: piso = elem[1][8:] #print piso if lgSeqPiso == 0: ficPiso = "%s/%s.tfa" % (repPiso,piso) #print ficPiso lgSeqPiso = len(fasta.seqEnVar(ficPiso)) eval = elem[10] #print eval lgAli.append(elem[3]) #print lgAli id.append(elem[2]) #print id else: if piso == "": print "%s\t%s\t%s\t%s\t%s\tNo hits found" % (pab,chromo,numGen,lgPab,nbXPab) else: idT = 0 lgAliT = 0 i = 0 while i < len(id) : idT += string.atof(id[i])*string.atof(lgAli[i]) lgAliT += string.atoi(lgAli[i]) i += 1 ident = idT/lgAliT print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.2f\t%s" % (pab,chromo,numGen,lgPab,nbXPab,piso,lgSeqPiso,lgAliT,ident,eval) break
def creeFastaSeqPartielle(infile, deb, fin): """ cree un fichier fasta contenant une partie de la sequence (de la position de debut a la position de fin) issue du fichier fourni """ sequence = seqEnVar(infile) seqPart = extraitSeqPartielle(sequence, deb, fin) header = "%s_%s-%s" % (files.get_name(infile), deb, fin) outfile = "%s.fasta" % header if not os.path.isfile(outfile): fromSeqToFasta(seqPart, header, outfile)