def analyseBlastPrelim(): # extrait infos pertinentes des resultats de blastp Fmt6 repGenome = "/Users/afutil/Documents/Genolevures/Pifa/Annotation/ModelGenes/ProtJigsaw/BlastP/ContrePiso/Eq1n" allfile = glob.glob("%s/*-Fmt6.blastp" % repGenome) repPiso = "/Users/afutil/Documents/Genolevures/Piso/SeqFinales/FastaProt" #repPiso = "/Users/afutil/Documents/Genolevures/Pist/Fasta/CGOB" repPab = "/Users/afutil/Documents/Genolevures/Pifa/Annotation/ModelGenes/ProtJigsaw" print "SeqPifa\tChromoPifa\tnumGene\tlgSeqPifa\tnbXSeqPifa\tSeqPist\tlgSeqPist\tlgAli\t%ageId\tBestEvalue" for file in allfile: pab = files.get_name(file).replace("-Fmt6","") chromo = pab[5:10] numGen = pab[12:] ficPab = "%s/PIFA.%s.tfa" % (repPab,pab[5:]) seqPab = fasta.seqEnVar(ficPab) lgPab = len(seqPab) nbXPab = seqPab.count('X') #print pab lgResu = open(file,"r").read().split("\n") piso = "" lgAli = [] id = [] lgSeqPiso = 0 for resu in lgResu: if resu != "": elem = resu.split("\t") #print elem[1][8:] if piso == "" or piso == elem[1][8:]: piso = elem[1][8:] #print piso if lgSeqPiso == 0: ficPiso = "%s/%s.tfa" % (repPiso,piso) #print ficPiso lgSeqPiso = len(fasta.seqEnVar(ficPiso)) eval = elem[10] #print eval lgAli.append(elem[3]) #print lgAli id.append(elem[2]) #print id else: if piso == "": print "%s\t%s\t%s\t%s\t%s\tNo hits found" % (pab,chromo,numGen,lgPab,nbXPab) else: idT = 0 lgAliT = 0 i = 0 while i < len(id) : idT += string.atof(id[i])*string.atof(lgAli[i]) lgAliT += string.atoi(lgAli[i]) i += 1 ident = idT/lgAliT print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.2f\t%s" % (pab,chromo,numGen,lgPab,nbXPab,piso,lgSeqPiso,lgAliT,ident,eval) break
def defNouvelIdent(fic): """ """ lines = open(fic,"r").read().split("\n") for line in lines: if line != "": nidg = "-" nidp = "-" idg = "-" idp = "-" lis = line.split("\t") loc1 = lis[0] loc2 = lis[2] if loc1 != "" and loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc1 ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()) if os.path.isfile("FastaGene/%s" % outf): sizeg1 = len(fasta.seqEnVar(ficg1)) sizeg2 = len(fasta.seqEnVar(ficg2)) if sizeg1 > sizeg2: sizeg = sizeg2 else: sizeg = sizeg1 idg = string.atof(alignement.extrait_id_needle("FastaGene/%s" % outf)) nidg = alignement.extrait_nbid_needle("FastaGene/%s" % outf) nidg = string.atof(nidg)/sizeg*100 if os.path.isfile("FastaProt/%s" % outf): sizep1 = len(fasta.seqEnVar(ficp1)) sizep2 = len(fasta.seqEnVar(ficp2)) if sizep1 > sizep2: sizep = sizep2 else: sizep = sizep1 idp = string.atof(alignement.extrait_id_needle("FastaProt/%s" % outf)) nidp = alignement.extrait_nbid_needle("FastaProt/%s" % outf) nidp = string.atof(nidp)/sizep*100 if idp != "-" and idg != "-": print "%.1f\t%.1f\t%s\t%s\t%.1f\t%.1f\t" % (idg,idp,loc1,loc2,nidg,nidp) else: print "%s\t%s\t%s\t%s\t%s\t%s" % (idg,idp,loc1,loc2,nidg,nidp) else: print "\t\t%s\t%s" % (loc1,loc2)
def defSimilarite(fic): """ """ lines = open(fic,"r").read().split("\n") for line in lines: if line != "": nsimg = "-" nsimp = "-" simg = "-" simp = "-" lis = line.split("\t") loc1 = lis[0] loc2 = lis[2] if loc1 != "" and loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc1 ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()) if os.path.isfile("FastaGene/%s" % outf): sizeg1 = len(fasta.seqEnVar(ficg1)) sizeg2 = len(fasta.seqEnVar(ficg2)) if sizeg1 > sizeg2: sizeg = sizeg2 else: sizeg = sizeg1 simg = string.atof(alignement.extrait_sim_needle("FastaGene/%s" % outf)) nsimg = alignement.extrait_nbsim_needle("FastaGene/%s" % outf) nsimg = string.atof(nsimg)/sizeg*100 if os.path.isfile("FastaProt/%s" % outf): sizep1 = len(fasta.seqEnVar(ficp1)) sizep2 = len(fasta.seqEnVar(ficp2)) if sizep1 > sizep2: sizep = sizep2 else: sizep = sizep1 simp = string.atof(alignement.extrait_sim_needle("FastaProt/%s" % outf)) nsimp = alignement.extrait_nbsim_needle("FastaProt/%s" % outf) nsimp = string.atof(nsimp)/sizep*100 if simp != "-" and simg != "-": print "%.1f\t%.1f\t%s\t%s\t%.1f\t%.1f\t" % (simg,simp,loc1,loc2,nsimg,nsimp) else: print "%s\t%s\t%s\t%s\t%s\t%s" % (simg,simp,loc1,loc2,nsimg,nsimp) else: print "\t\t%s\t%s" % (loc1,loc2)
def lanceBlastxFromScaff(): print "toto" allfile = glob.glob("/Users/afutil/Documents/Genolevures/PiFa/AssemblageGenome/AssemblagesFinaux/assemblageRef1.0/*.tfa") database = "/Users/afutil/Documents/Genolevures/PiFa/AssemblageGenome/AssemblagesFinaux/SynteniePistPiso/DBblast/pistPisoProtFmt6" repout = "/Users/afutil/Documents/Genolevures/PiFa/AssemblageGenome/AssemblagesFinaux/SynteniePistPiso/BlastxFmt6" for file in allfile: seq = fasta.seqEnVar(file) fname = files.get_name(file) print "%s\t%s" % (fname,len(seq)) if len(seq) < 2000: outfile = "%s/%s.blastx" % (repout,fname) alignement.run_blastxFmt(file,outfile,database) else : fic1 = "%s/%s-deb.tfa" % (repout,fname) fic2 = "%s/%s-fin.tfa" % (repout,fname) of1 = open(fic1,"w") of2 = open(fic2,"w") #print ">%s\n%s\n" % (files.get_name(fic1),seq[0:1000]) of1.write(">%s\n%s\n" % (files.get_name(fic1),seq[0:1000])) of2.write(">%s\n%s\n" % (files.get_name(fic2),seq[-1000:])) of1.close() of2.close() for fic in fic1,fic2: outfile = "%s/%s.blastx" % (repout,files.get_name(fic)) alignement.run_blastxFmt(file,outfile,database)
def verifLongEtExtrGene(): allfile = glob.glob("/Users/anfutil/Documents/Genolevures/Pifa/Annotation/PropositionGenesSelontBlastN/Combinaison/*.tfa") print "GeneName\tSeqLen" for file in allfile: seq = fasta.seqEnVar(file) if seq[0:3] == "ATG": #if seq[-3:] == "TAA" or seq[-3:] == "TAG" or seq[-3:] == "TGA": if seq[-3:] in ["TAA","TGA","TAG"]: if len(seq) < 900: print "%s\t%s" % (files.get_name(file), len(seq))
def statsAce(inrep): #pour chaque scaff et contig # recuperation du nom, de la taille de la sequence et du nombre de N allfile = glob.glob("%s/*" % inrep) for file in allfile: nom = files.get_name(file) nom = string.replace(nom,"_L13","") seq = fasta.seqEnVar(file) nbN = seq.lower().count("n") print "%s\t%s\t%s\t" % (nom,len(seq),nbN)
def creeSequenceDeGene(scaff,deb,fin): header = "%s %s %s" % (scaff,deb,fin) filename = "%s-%s-%s.tfa" % (scaff,deb,fin) ficScaff = "ScaffTfa/%s.tfa" % scaff seq = fasta.seqEnVar(ficScaff) seqGen = extraitSeqGene(seq,string.atoi(deb),string.atoi(fin)) if os.path.isfile(filename): filename = "%s-2" % filename fasta.fromSeqToFasta(seqGen,header,filename)
def creeSequenceDeGene(scaff,deb,fin,geneN,repGene,repSeq): header = "%s-%s-%s" % (scaff,deb,fin) filename = "%s/%s_%s-%s-%s.tfa" % (repGene,geneN,scaff,deb,fin) ficScaff = "%s/%s.tfa" % (repSeq,scaff) seq = fasta.seqEnVar(ficScaff) seqGen = extraitSeqGene(seq,string.atoi(deb),string.atoi(fin)) if os.path.isfile(filename): print "il existe deja" if not os.path.isfile(filename): fasta.fromSeqToFasta(seqGen,header,filename)
def lanceMicroPipe(): if not os.path.isdir("SeqGene"): os.mkdir("SeqGene") if not os.path.isdir("SeqProt"): os.mkdir("SeqProt") if not os.path.isdir("AliGene"): os.mkdir("AliGene") if not os.path.isdir("AliProt"): os.mkdir("AliProt") if not os.path.isdir("PairwiseGene"): os.mkdir("PairwiseGene") if not os.path.isdir("PairwiseProt"): os.mkdir("PairwiseProt") blastDB = "/BlastDB/Nucleic/Sace/1002Genomes.nt" for gene in glob.glob("*fasta"): long = len(fasta.seqEnVar("%s" % gene)) * 3 #print long geneN = files.get_name(gene) ficOut = "%s.blastn" % (geneN) cmdBl1 = "blast -query %s -db %s -out %s -seg no -soft_masking false" % (gene, blastDB, ficOut) cmdBl2 = "blastn -query %s -db %s -out %s-6 -outfmt 6 -seg no m-soft_masking false" % (gene, blastDB, ficOut) os.system(cmdBl1) os.system(cmdBl2) lanceRecupSeq("%s-6" % ficOut, long, geneN) # concatSeqAAligner(geneN, "SeqGene", "AliGene/%s_all.fasta" % geneN) # alignement de ces sequences alignement.run_mafft("AliGene/%s_all.fasta" % geneN, "AliGene/%s_all-ali.tfa" % geneN) os.system("rm AliGene/%s_all.fasta" % geneN) concatPaireSeqAAligner(geneN, "SeqGene", "PairwiseGene/%s.fasta" % geneN) alignement.run_mafft("PairwiseGene/%s.fasta" % geneN, "PairwiseGene/%s-ali.fasta" % geneN) lignes = open("PairwiseGene/%s-ali.fasta" % geneN, "r").read().split("\n") if len(lignes) == 1: os.system("rm PairwiseGene/%s-ali.fasta" % geneN) os.system("rm PairwiseGene/%s.fasta" % geneN) creeSeqProtSelonSeqGene("%s*" % (geneN), "SeqGene", "SeqProt") concatPaireSeqAAligner(geneN, "SeqProt", "PairwiseProt/%s.fasta" % geneN) alignement.run_mafft("PairwiseProt/%s.fasta" % geneN, "PairwiseProt/%s-ali.fasta" % geneN) lignes = open("PairwiseProt/%s-ali.fasta" % geneN, "r").read().split("\n") if len(lignes) == 1: os.system("rm PairwiseProt/%s-ali.fasta" % geneN) os.system("rm PairwiseProt/%s.fasta" % geneN) concatSeqAAligner(geneN, "SeqProt", "AliProt/%s_all.fasta" % geneN) # alignement de ces sequences alignement.run_mafft("AliProt/%s_all.fasta" % geneN, "AliProt/%s_all-ali.tfa" % geneN) os.system("rm AliProt/%s_all.fasta" % geneN)
def defSimilaritePseudo(fic): """ """ lines = open(fic,"r").read().split("\n") for line in lines: if line != "": nsimg = "-" simg = "-" lis = line.split("\t") loc1 = lis[0] loc2 = lis[2] if loc1 != "" and loc2 != "": ficg1 = "Genes+Pseudos/%s.tfa" % loc1 ficg2 = "Genes+Pseudos/%s.tfa" % loc2 outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()) if not os.path.isfile("Genes+Pseudos/%s" % outf): alignement.ali_needle(ficg1,ficg2) sizeg1 = len(fasta.seqEnVar(ficg1)) sizeg2 = len(fasta.seqEnVar(ficg2)) if sizeg1 > sizeg2: sizeg = sizeg2 else: sizeg = sizeg1 simg = string.atof(alignement.extrait_sim_needle("Genes+Pseudos/%s" % outf)) nsimg = alignement.extrait_nbsim_needle("Genes+Pseudos/%s" % outf) nsimg = string.atof(nsimg)/sizeg*100 if simg != "-": print "%.1f\t%s\t%s\t%.1f\t" % (simg,loc1,loc2,nsimg) else: print "%s\t%s\t%s\t%s" % (simg,loc1,loc2,nsimg) else: print "\t\t%s\t%s" % (loc1,loc2)
def extraitInfoDispersionSevScaffDansBlast(): print "scaff\tseq length\tnb hits\tdiffScaff" rep = "/Users/anfutil/Documents/Genolevures/Pifa/Annotation" #lignes = open(fic,"r").read().split("\n") allfile = glob.glob("%s/tBlastNFmt6-Pist/*.tblastn" % rep) for file in allfile: nbHit = 0 fname = files.get_name(file) sqLen = len(fasta.seqEnVar("%s/Proteome-Pist/%s.tfa" % (rep,fname))) lines = open(file,"r").read().split("\n") expect = 0 scaff = "" difSc = 0 min = 1000000000 max = 0 for line in lines: if line != "": lis = line.split("\t") expect = lis[10] if expect.find("e") != -1 or expect =="0.0": # si je couvre plus de 90% de ma seq de depart, je considere que l orf est complete if (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.9: break # si j ai plus de 30% d identite sur au moins 10% de la seq soumise # je considere que mon hit est potentiellement interessant if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.1: if lis[1] == scaff: if string.atof(lis[7]) > string.atoi(lis[6]): min = minimum(min,string.atoi(lis[6])) max = maximum(max,string.atoi(lis[7])) else: min = minimum(min,string.atoi(lis[6])) max = maximum(max,string.atoi(lis[7])) continue else : maxN = maximum(string.atoi(lis[6]),string.atoi(lis[7])) minN = minimum(string.atoi(lis[6]),string.atoi(lis[7])) if maxN < max and minN > min: break else: difSc += 1 print line max = maximum(max,maxN) min = minimum(min,minN) scaff = lis[1] if nbHit > 1: print "%s\t%s\t%s hits\t%s\t%s" % (fname,sqLen,nbHit,mmSc,difSc)
def identifieRecouvrement100(): # va parser tous les blast allfile = glob.glob("/Users/anfutil/Documents/Genolevures/Pifa/Annotation/tBlastNFmt6-Pist/*.tblastn") for file in allfile: # recupere la taille de la sequence traitee name = files.get_name(file) ficSeq = "/Users/anfutil/Documents/Genolevures/Pifa/Annotation/Proteome-Pist/%s.tfa" % name #ficSeq = "/Users/anfutil/Documents/Genolevures/Piso/SeqFinales/FastaProt/%s.tfa" % name sqLen = len(fasta.seqEnVar(ficSeq)) # compare a la taille de la sequence alignees lines = open(file,"r").read().split("\n") if lines[0] != "": el = lines[0].split("\t") long = string.atoi(el[7]) - string.atoi(el[6]) + 1 if sqLen == long: print "%s\t%s-%s-%s.tfa\t%s" % (name,el[1],el[8],el[9],el[2])
def recapGeneProt(): # nb de genes / taille / pourcentage de N / nb de genes avec des N allfile = glob.glob("./FastaProt/*.tfa") idxNbG = 0 idxNbGN = 0 nbNTot = 0 lgSeqTot = 0 for file in allfile: nbN = 0 name = files.get_name(file) idxNbG += 1 seq = fasta.seqEnVar(file) lgSeq = len(seq) lgSeqTot = lgSeqTot + lgSeq if "N" in seq: idxNbGN +=1 nbN = seq.count('N') nbNTot = nbNTot + nbN print "%s\t%s\t%s\t" % (name,lgSeq,nbN) print "Au final\n%s prot dont %s contiennent au moins 1 \"N\", taille cumulee : %s pb dont %s N" % (idxNbG, idxNbGN,lgSeqTot,nbNTot)
def creeSequenceDeGene(scaff, deb, fin, geneN, repGene, repSeq, long): header = "%s" % (scaff) filename = "%s/%s_%s-%s-%s.tfa" % (repGene, geneN, scaff, deb, fin) ficScaff = "%s/%s.tfa" % (repSeq, scaff) seq = fasta.seqEnVar(ficScaff) seqGen = extraitSeqGene(seq, string.atoi(deb), string.atoi(fin)).upper() # creation de la sequence uniquement si taille > a 98% de la sequence reference et si la taille est un multiple de 3 #if (len(seqGen) > long * 0.85 or len(seqGen) > 500) and len(seqGen) % 3 == 0: if 1 == 1: if os.path.isfile(filename): print "il existe deja" else: seqGen = seqGen.replace("X", "N") seqGen = seqGen.replace("S", "N") seqGen = seqGen.replace("W", "N") seqGen = seqGen.replace("R", "N") seqGen = seqGen.replace("Y", "N") seqGen = seqGen.replace("K", "N") seqGen = seqGen.replace("M", "N") fasta.fromSeqToFasta(seqGen, header, filename)
def recapIdentiteFYIL(): workdir = "/Users/afutil/Documents/DataJoseph/Incompatibilites/FY4-IL01/MappedRegions/" allfile = glob.glob("%sGeneFY/RegionChr8/*.tfa" % workdir) repAliG = "%sGeneIL01/RegionChr8/Alignements/" % workdir repAliP = "%sProtIL01/RegionChr8/Alignements/" % workdir print "eltType\tgeneN\tgeneLength\tnbSnp\tAliLen\t%ageIdMoyen\tcouverture\tdetail/ali" for file in allfile: #print file lid = [] lnbSnp = [] lLen = [] #print file #seqG = fasta.seqEnVar(file) #print seqG seqLen = len(fasta.seqEnVar(file)) allAli = glob.glob("%s%s*" % (repAliG,files.get_name(file))) if allAli != []: #print allAli for ali in allAli: longAli = alignement.extrait_lg_water(ali) lid.append(alignement.extrait_id_water(ali)) nbId = alignement.extrait_nbid_water(ali) lnbSnp.append(string.atoi(longAli) - string.atoi(nbId)) lLen.append(longAli) toWr = "Gene\t%s\t%stoto" % (files.get_name(file),seqLen) tId = 0 tSnp = 0 tLen = 0 i = 0 for snp in lnbSnp: long = lLen[i] id = lid[i] tId += string.atof(id) * string.atof(long) tLen += string.atof(long) tSnp += snp toWr += "\t%s\t%s" % (snp,long) i += 1 meanId = tId / tLen meanCov = tLen / seqLen * 100 toWr = string.replace(toWr,"toto","\t%s\t%.0f\t%.2f\t%.0f" % (tSnp,tLen,meanId,meanCov)) #toWr += "\t%s\t%.0f\t%.2f\t%.0f" % (tSnp,tLen,meanId,meanCov) print toWr allAliP = glob.glob("%s%s*" % (repAliP,files.get_name(file))) lid = [] lnbSnp = [] lLen = [] if allAliP != []: #print allAli for ali in allAliP: longAli = alignement.extrait_lg_water(ali) lid.append(alignement.extrait_id_water(ali)) nbId = alignement.extrait_nbid_water(ali) lnbSnp.append(string.atoi(longAli) - string.atoi(nbId)) lLen.append(longAli) toWr = "Prot\t%s\t%stoto" % (files.get_name(file),seqLen/3) tId = 0 tSnp = 0 tLen = 0 i = 0 for snp in lnbSnp: long = lLen[i] id = lid[i] tId += string.atof(id) * string.atof(long) tLen += string.atof(long) tSnp += snp toWr += "\t%s\t%s" % (snp,long) i += 1 meanId = tId / tLen meanCov = tLen / (seqLen/3) * 100 toWr = string.replace(toWr,"toto","\t%s\t%.0f\t%.2f\t%.0f" % (tSnp,tLen,meanId,meanCov)) #toWr += "\t%s\t%.0f\t%.2f\t%.0f" % (tSnp,tLen,meanId,meanCov) print toWr
def trouveTelom(fic): sequence = fasta.seqEnVar(fic) print sequence
def extraitInfoBlastBIS(fic): """ extrait id + %age identite + long ali + long seq1 et seq2 d un fichier blast formate avec l option -outfmt 6 prend en entree le repertoire qui contient l ensemble des fichiers a traiter """ rep = "/Users/afutil/Documents/Genolevures/PiSo/AnalyseGenome/CladeCTG/RechercheSimFmt6" lignes = open(fic,"r").read().split("\n") #allfile = glob.glob("%s/*.blastp" % inrep) outfile = "outputAnaBlast" outfile2 = "outTest.tab" f = open(outfile,"w") f2 = open(outfile2,"w") for ligne in lignes: el = ligne.split("\t") fname = el[1].lower() #categorie = el[1] if fname == "" or fname[-1].lower() == "r": print "" f.write("\n\n") f2.write("%s\n" % fname) continue file = "%s/%s.blastp" % (rep,fname.lower()) print fname if os.path.isfile("/Users/afutil/Documents/Genolevures/PiSo/SeqFinales/FastaProt/%s.tfa" % fname.upper()) and os.path.isfile(file): sqLen = len(fasta.seqEnVar("/Users/afutil/Documents/Genolevures/PiSo/SeqFinales/FastaProt/%s.tfa" % fname.upper())) else: f.write("\n\n") f2.write("%s\n" % fname) continue print sqLen debha = 0; canal = 0; cantr = 0; picgu = 0; picst = 0; canlu = 0; lodel = 0; canpa = 0; candu = 0 debha2 = 0; canal2 = 0; cantr2 = 0; picgu2 = 0; picst2 = 0; canlu2 = 0; lodel2 = 0; canpa2 = 0; candu2 = 0 deb = ""; caa = ""; cat = ""; pig = ""; pis = ""; cal = ""; lod = ""; cap = ""; cad = "" lines = open(file,"r").read().split("\n") expect = 0 for line in lines: if line != "": lis = line.split("\t") expect = lis[10] print "expect est %s" % expect if expect.find("e") != -1 or expect =="0.0": print "a suis rentre" acc = lis[1] if acc[9:13] == "CPAG": if canpa == 0: cap = "%s,%s,%s,%s" % (lis[1][3:13],sqLen,lis[2],lis[3]) if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.3: canpa = 1 canpa2 += 1 else: suff = acc[17:] if suff == "DEBHA": if debha == 0: deb = "%s,%s,%s,%s" % (lis[1][10:],sqLen,lis[2],lis[3]) print "%s - %s " % (string.atof(lis[2]),(string.atof(lis[7])-string.atoi(lis[6]))/sqLen) if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.3: print "+" debha = 1 debha2 += 1 elif suff == "CANAL": if canal == 0: caa = "%s,%s,%s,%s" % (lis[1][10:],sqLen,lis[2],lis[3]) if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.3: canal = 1 canal2 += 1 elif suff == "CANTT" or suff == "CANTR": if cantr == 0: cat = "%s,%s,%s,%s" % (lis[1][10:],sqLen,lis[2],lis[3]) if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.3: cantr = 1 cantr2 += 1 elif suff == "PICGU": if picgu == 0: pig = "%s,%s,%s,%s" % (lis[1][10:],sqLen,lis[2],lis[3]) if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.3: picgu = 1 picgu2 +=1 elif suff == "PICST": if picst == 0: pis = "%s,%s,%s,%s" % (lis[1][10:],sqLen,lis[2],lis[3]) if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.3: picst = 1 picst2 += 1 elif suff == "CLALS" or suff == "CLAL4": if canlu == 0: cal = "%s,%s,%s,%s" % (lis[1][10:],sqLen,lis[2],lis[3]) if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.3: canlu = 1 canlu2 += 1 elif suff == "LODEL": if lodel == 0: lod = "%s,%s,%s,%s" % (lis[1][10:],sqLen,lis[2],lis[3]) if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.3: lodel = 1 lodel2 += 1 elif suff == "CANDU" or suff == "CANDC": if candu == 0: cad = "%s,%s,%s,%s" % (lis[1][10:],sqLen,lis[2],lis[3]) if string.atof(lis[2]) > 30 and (string.atof(lis[7])-string.atoi(lis[6]))/sqLen>0.3: candu = 1 candu2 += 1 else: print "a suis pas rentre" break f.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (fname,deb,pis,pig,cal,caa,cad,cat,cap,lod)) #print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (fname,debha,canal,cantr,picgu,picst,canlu,lodel,canpa) f.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (fname,debha,picst,picgu,canlu,canal,candu,cantr,canpa,lodel)) f2.write ("%s\t'%s%s%s%s%s%s%s%s%s'\t'%s;%s;%s;%s;%s;%s;%s;%s;%s'\n" % (fname,debha,picst,picgu,canlu,canal,candu,cantr,canpa,lodel,debha2,picst2,picgu2,canlu2,canal2,candu2,cantr2,canpa2,lodel2)) print "%s\t'%s%s%s%s%s%s%s%s%s'\t'%s;%s;%s;%s;%s;%s;%s;%s;%s'" % (fname,debha,picst,picgu,canlu,canal,candu,cantr,canpa,lodel,debha2,picst2,picgu2,canlu2,canal2,candu2,cantr2,canpa2,lodel2) f.close() f2.close()
def tailleGenesPiso(): allfile = glob.glob("/Users/afutil/Documents/Genolevures/PiSo/SeqFinales/FastaGene/*tfa") for file in allfile: lgseq = len(fasta.seqEnVar(file)) print "%s\t%s" % (files.get_name(file),lgseq)