def verifIdent(fic): """ """ ficOut = "outest.txt" f = open(ficOut,"w") lines = open(fic,"r").read().split("\n") for line in lines: if line != "": idg = "-" idp = "-" lis = line.split("\t") loc1 = lis[0] loc2 = lis[1] if loc1 != "" and loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc1 ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 if os.path.isfile(ficg1) and os.path.isfile(ficg2): alignement.ali_needle(ficg1,ficg2) idg = alignement.extrait_id_needle("FastaGene/%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())) if os.path.isfile(ficp1) and os.path.isfile(ficp2): alignement.ali_needle(ficp1,ficp2) idp = alignement.extrait_id_needle("FastaProt/%s-%s.needle" % (files.get_name(ficp1).lower(),files.get_name(ficp2).lower())) f.write("%s\t%s\t%s\t%s\n" % (loc1,loc2,idg,idp)) f.close
def defNouvelIdent(fic): """ """ lines = open(fic,"r").read().split("\n") for line in lines: if line != "": nidg = "-" nidp = "-" idg = "-" idp = "-" lis = line.split("\t") loc1 = lis[0] loc2 = lis[2] if loc1 != "" and loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc1 ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()) if os.path.isfile("FastaGene/%s" % outf): sizeg1 = len(fasta.seqEnVar(ficg1)) sizeg2 = len(fasta.seqEnVar(ficg2)) if sizeg1 > sizeg2: sizeg = sizeg2 else: sizeg = sizeg1 idg = string.atof(alignement.extrait_id_needle("FastaGene/%s" % outf)) nidg = alignement.extrait_nbid_needle("FastaGene/%s" % outf) nidg = string.atof(nidg)/sizeg*100 if os.path.isfile("FastaProt/%s" % outf): sizep1 = len(fasta.seqEnVar(ficp1)) sizep2 = len(fasta.seqEnVar(ficp2)) if sizep1 > sizep2: sizep = sizep2 else: sizep = sizep1 idp = string.atof(alignement.extrait_id_needle("FastaProt/%s" % outf)) nidp = alignement.extrait_nbid_needle("FastaProt/%s" % outf) nidp = string.atof(nidp)/sizep*100 if idp != "-" and idg != "-": print "%.1f\t%.1f\t%s\t%s\t%.1f\t%.1f\t" % (idg,idp,loc1,loc2,nidg,nidp) else: print "%s\t%s\t%s\t%s\t%s\t%s" % (idg,idp,loc1,loc2,nidg,nidp) else: print "\t\t%s\t%s" % (loc1,loc2)
def calcIdent(fic): """ """ if os.path.isdir(fic): allfile = glob.glob("%s/*.tfa" % fic) fout = "ficOut" f = open(fout,"w") f.write("el1\tel2\tid\tsim\n") listFic = allfile for file in allfile : f1 = listFic[0] listFic = listFic[1:] for f2 in listFic: #ficg1 = "FastaGene/%s.tfa" % loc1[0:-1] #ficg2 = "FastaGene/%s.tfa" % loc2[0:-1] #ficp1 = "FastaProt/%s.tfa" % loc1[0:-1] #ficp2 = "FastaProt/%s.tfa" % loc2[0:-1] name = "%s-%s.needle" % (files.get_name(f1).lower(),files.get_name(f2).lower()) outfile = "%s/%s" % (fic, name) if not os.path.isfile(outfile): alignement.ali_needle(f1,f2,outfile) if os.path.isfile(outfile): id = alignement.extrait_id_needle(outfile) sim = alignement.extrait_sim_needle(outfile) f.write("%s\t%s\t%s\t%s\n" % (files.get_name(f1),files.get_name(f2),id,sim)) f.close()
def defNouvelIdentncRNA(fic): """ """ lines = open(fic,"r").read().split("\n") for line in lines: if line != "": nidg = "-" idg = "-" lis = line.split("\t") loc1 = lis[0] loc2 = lis[2] if loc1 != "" and loc2 != "": ficg1 = "Genes+Pseudos/%s.tfa" % loc1 ficg2 = "Genes+Pseudos/%s.tfa" % loc2 outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()) if not os.path.isfile("Genes+Pseudos/%s" % outf): alignement.ali_needle(ficg1,ficg2) sizeg1 = len(fasta.seqEnVar(ficg1)) sizeg2 = len(fasta.seqEnVar(ficg2)) if sizeg1 > sizeg2: sizeg = sizeg2 else: sizeg = sizeg1 idg = string.atof(alignement.extrait_id_needle("Genes+Pseudos/%s" % outf)) nidg = alignement.extrait_nbid_needle("Genes+Pseudos/%s" % outf) nidg = string.atof(nidg)/sizeg*100 if idg != "-": print "%.1f\t%s\t%s\t%.1f\t" % (idg,loc1,loc2,nidg) else: print "%s\t%s\t%s\t%s" % (idg,loc1,loc2,nidg) else: print "\t\t%s\t%s" % (loc1,loc2)
def definiAllele(fic,database): """ """ totg = 0 g = 0 totp = 0 lines = open(fic,"r").read().split("\n") for line in lines: if line != "": lis = line.split("\t") def1 = lis[0] loc1 = lis[1] def2 = lis[2] loc2 = lis[3] if loc1 != "" and loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc1 ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 elif loc1 != "" and def1 == "noSim": ficin = "FastaGene/%s.tfa" % loc1 ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower() if not os.path.isfile(ficout): alignement.run_blastn(ficin, ficout, database) if os.path.isfile(ficout): lis = open(ficout,"r").read().split("\n") if len(lis) == 2: inf1 = "noSim" else: det = lis[1].split("\t") if string.atof(det[2]) > 70: inf1 = "%s-%s" % (det[1].split("|")[-1],det[2]) else : inf1 = "soSignSim" print "%s\t%s" % (loc1,inf1) continue elif loc2 != "" and def2 == "noSim": ficin = "FastaGene/%s.tfa" % loc2 ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower() alignement.run_blastn(ficin, ficout, database) if os.path.isfile(ficout): lis = open(ficout,"r").read().split("\n") if len(lis) == 2: inf1 = "noSim" else: det = lis[1].split("\t") if string.atof(det[2]) > 70: inf1 = "%s-%s" % (det[1].split("|")[-1],det[2]) else : inf1 = "soSignSim" print "%s\t%s" % (loc2,inf1) continue elif loc1 != "": ficg1 = "FastaGene/%s.tfa" % loc1 loc2 = def1.split("-")[0] ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 elif loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc2 loc1 = def2.split("-")[0] ficg2 = "FastaGene/%s.tfa" % loc1 ficp1 = "FastaProt/%s.tfa" % loc2 ficp2 = "FastaProt/%s.tfa" % loc1 outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()) if not os.path.isfile("FastaGene/%s" % outf): alignement.ali_needle(ficg1,ficg2) if not os.path.isfile("FastaProt/%s" % outf): alignement.ali_needle(ficp1,ficp2) idg = alignement.extrait_id_needle("FastaGene/%s" % outf) idp = alignement.extrait_id_needle("FastaProt/%s" % outf) if idg == "100": suf = 1 else: if idg > 70: suf = 2 totg = totg + string.atof(idg) g = g + 1 totp = totp + string.atof(idp) print "%s\t%s\t%s\t%s" % (loc1,loc2,idg,idp) print "\nMoyenne des pourcentages d identite (sans tenir compte des 100%)" print "au niveau des genes : %s" % (totg/g) print "au niveau des proteines: %s" % (totp/g)
def compAnnot(fic,database): """ """ totg = 0 g = 0 totp = 0 lines = open(fic,"r").read().split("\n") for line in lines: if line != "": idg = "-" idp = "-" lis = line.split("\t") type1 = lis[0] type2 = lis[9] annot1 = lis[1] annot2 = lis[10] loc1 = lis[2] loc2 = lis[11] orf1 = lis[3] orf2 = lis[12] deb1 = lis[4] deb2 = lis[13] fin1 = lis[5] fin2 = lis[14] long1 = lis[6] long2 = lis[15] sens1 = lis[7] sens2 = lis[16] if loc1 != "" and loc2 != "": ficg1 = "FastaGene/%s.tfa" % loc1 ficg2 = "FastaGene/%s.tfa" % loc2 ficp1 = "FastaProt/%s.tfa" % loc1 ficp2 = "FastaProt/%s.tfa" % loc2 outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()) if not os.path.isfile("FastaGene/%s" % outf): if os.path.isfile(ficg1) and os.path.isfile(ficg2): alignement.ali_needle(ficg1,ficg2) if not os.path.isfile("FastaProt/%s" % outf): if os.path.isfile(ficp1) and os.path.isfile(ficp2): alignement.ali_needle(ficp1,ficp2) if os.path.isfile("FastaGene/%s" % outf): idg = alignement.extrait_id_needle("FastaGene/%s" % outf) if os.path.isfile("FastaProt/%s" % outf): idp = alignement.extrait_id_needle("FastaProt/%s" % outf) if idg == "100": suf = 1 else: suf = 2 if idg != "-": totg = totg + string.atof(idg) g = g + 1 totp = totp + string.atof(idp) print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\t%s\t%s\t\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (type1,annot1,loc1,orf1,deb1,fin1,long1,sens1,idg,idp,type2,annot2,loc2,orf2,deb2,fin2,long2,sens2,suf) continue else: suf = "" idg = "" idp = "" inf1 = "" inf2 = "" # si n est pas un allele evident, je lance mon gene contre ma banque complete pour voir si y aurait pas une similarite significative if loc1 != "": ficin = "FastaGene/%s.tfa" % loc1 if os.path.isfile(ficin): ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower() alignement.run_blastn(ficin, ficout, database) if os.path.isfile(ficout): lis = open(ficout,"r").read().split("\n") if len(lis) == 2: inf1 = "noSim" else: det = lis[1].split("\t") inf1 = "%s-%s" % (det[1].split("|")[-1],det[2]) if loc2 != "": ficin = "FastaGene/%s.tfa" % loc2 if os.path.isfile(ficin): ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower() alignement.run_blastn(ficin, ficout, database) if os.path.isfile(ficout): lis = open(ficout,"r").read().split("\n") if len(lis) == 2: inf2 = "noSim" else: det = lis[1].split("\t") inf2 = "%s-%s" % (det[1].split("|")[-1],det[2]) print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (type1,annot1,loc1,orf1,deb1,fin1,long1,sens1,inf1,inf2,type2,annot2,loc2,orf2,deb2,fin2,long2,sens2) print "\nMoyenne des pourcentages d identite (sans tenir compte des 100%)" print "au niveau des genes : %s" % (totg/g) print "au niveau des proteines: %s" % (totp/g)