Python extrait_id_needleの例、alignement.extrait_id_needle Pythonの例

コード例 #1

0

ファイルを表示

ファイル: pourFabien.py プロジェクト: bioinfocoderz/Scripts

def verifIdent(fic):
    """
    """
    ficOut = "outest.txt"
    f = open(ficOut,"w")

    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            idg = "-"
            idp = "-"
            lis = line.split("\t")
            loc1 = lis[0]
            loc2 = lis[1]
            if loc1 != "" and loc2 != "":
                ficg1 = "FastaGene/%s.tfa" % loc1
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2
                if os.path.isfile(ficg1) and os.path.isfile(ficg2):
                    alignement.ali_needle(ficg1,ficg2)
                    idg = alignement.extrait_id_needle("FastaGene/%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower()))
                if os.path.isfile(ficp1) and os.path.isfile(ficp2):
                    alignement.ali_needle(ficp1,ficp2)
                    idp = alignement.extrait_id_needle("FastaProt/%s-%s.needle" % (files.get_name(ficp1).lower(),files.get_name(ficp2).lower()))
            f.write("%s\t%s\t%s\t%s\n" % (loc1,loc2,idg,idp))            
    f.close

コード例 #2

0

ファイルを表示

ファイル: allele2.py プロジェクト: bioinfocoderz/Scripts

def defNouvelIdent(fic):
    """
    """
    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            nidg = "-"
            nidp = "-"
            idg = "-"
            idp = "-"
            lis = line.split("\t")
            loc1 = lis[0]
            loc2 = lis[2]
                
            if loc1 != "" and loc2 != "":
                
                ficg1 = "FastaGene/%s.tfa" % loc1
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2
                outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())
                if os.path.isfile("FastaGene/%s" % outf):
                    sizeg1 = len(fasta.seqEnVar(ficg1))
                    sizeg2 = len(fasta.seqEnVar(ficg2))
                    if sizeg1 > sizeg2:
                        sizeg = sizeg2 
                    else:
                        sizeg = sizeg1 
                    
                    idg = string.atof(alignement.extrait_id_needle("FastaGene/%s" % outf))
                    nidg = alignement.extrait_nbid_needle("FastaGene/%s" % outf)
                    nidg = string.atof(nidg)/sizeg*100
                if os.path.isfile("FastaProt/%s" % outf):
                    sizep1 = len(fasta.seqEnVar(ficp1))
                    sizep2 = len(fasta.seqEnVar(ficp2))
                    if sizep1 > sizep2:
                        sizep = sizep2 
                    else:
                        sizep = sizep1 
                    idp = string.atof(alignement.extrait_id_needle("FastaProt/%s" % outf))
                    nidp = alignement.extrait_nbid_needle("FastaProt/%s" % outf)
                    nidp = string.atof(nidp)/sizep*100
                if idp != "-" and idg != "-":
                    print "%.1f\t%.1f\t%s\t%s\t%.1f\t%.1f\t" % (idg,idp,loc1,loc2,nidg,nidp)
                else: 
                    print "%s\t%s\t%s\t%s\t%s\t%s" % (idg,idp,loc1,loc2,nidg,nidp)
            else:
                print "\t\t%s\t%s" % (loc1,loc2)

コード例 #3

0

ファイルを表示

ファイル: pourFabien.py プロジェクト: bioinfocoderz/Scripts

def calcIdent(fic):
    """
    """
    if os.path.isdir(fic):
        allfile = glob.glob("%s/*.tfa" % fic)
    fout = "ficOut"
    f = open(fout,"w")
    f.write("el1\tel2\tid\tsim\n")
    listFic = allfile    
    for file in allfile :
        f1 = listFic[0]
        listFic = listFic[1:]
        for f2 in listFic:
            #ficg1 = "FastaGene/%s.tfa" % loc1[0:-1]
            #ficg2 = "FastaGene/%s.tfa" % loc2[0:-1]
            #ficp1 = "FastaProt/%s.tfa" % loc1[0:-1]
            #ficp2 = "FastaProt/%s.tfa" % loc2[0:-1]
            name = "%s-%s.needle" % (files.get_name(f1).lower(),files.get_name(f2).lower()) 
            outfile = "%s/%s" % (fic, name)
            if not os.path.isfile(outfile):
                alignement.ali_needle(f1,f2,outfile)
            if os.path.isfile(outfile):
                id = alignement.extrait_id_needle(outfile)
                sim = alignement.extrait_sim_needle(outfile)
                f.write("%s\t%s\t%s\t%s\n" % (files.get_name(f1),files.get_name(f2),id,sim))
    f.close()

コード例 #4

0

ファイルを表示

ファイル: allele2.py プロジェクト: bioinfocoderz/Scripts

def defNouvelIdentncRNA(fic):
    """
    """
    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            nidg = "-"
            idg = "-"
            lis = line.split("\t")
            loc1 = lis[0]
            loc2 = lis[2]
                
            if loc1 != "" and loc2 != "":
                
                ficg1 = "Genes+Pseudos/%s.tfa" % loc1
                ficg2 = "Genes+Pseudos/%s.tfa" % loc2

                outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())
                if not os.path.isfile("Genes+Pseudos/%s" % outf):
                    alignement.ali_needle(ficg1,ficg2)
                    
                sizeg1 = len(fasta.seqEnVar(ficg1))
                sizeg2 = len(fasta.seqEnVar(ficg2))
                if sizeg1 > sizeg2:
                    sizeg = sizeg2 
                else:
                    sizeg = sizeg1 
                
                idg = string.atof(alignement.extrait_id_needle("Genes+Pseudos/%s" % outf))
                nidg = alignement.extrait_nbid_needle("Genes+Pseudos/%s" % outf)
                nidg = string.atof(nidg)/sizeg*100
            
                if idg != "-":
                    print "%.1f\t%s\t%s\t%.1f\t" % (idg,loc1,loc2,nidg)
                else: 
                    print "%s\t%s\t%s\t%s" % (idg,loc1,loc2,nidg)
            else:
                print "\t\t%s\t%s" % (loc1,loc2)

コード例 #5

0

ファイルを表示

ファイル: pourFabien.py プロジェクト: bioinfocoderz/Scripts

def definiAllele(fic,database):
    """
    """
    totg = 0
    g = 0
    totp = 0
    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            lis = line.split("\t")
            def1 = lis[0]
            loc1 = lis[1]
            def2 = lis[2]
            loc2 = lis[3]
            if loc1 != "" and loc2 != "":
                ficg1 = "FastaGene/%s.tfa" % loc1
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2
            elif loc1 != "" and def1 == "noSim":
                ficin = "FastaGene/%s.tfa" % loc1
                ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower()
                if not os.path.isfile(ficout):
                    alignement.run_blastn(ficin, ficout, database)
                if os.path.isfile(ficout):
                    lis = open(ficout,"r").read().split("\n")
                    if len(lis) == 2:
                        inf1 = "noSim"
                    else:
                        det = lis[1].split("\t")
                        if string.atof(det[2]) > 70:
                            inf1 = "%s-%s" % (det[1].split("|")[-1],det[2])  
                        else : 
                            inf1 = "soSignSim"  
                print "%s\t%s" % (loc1,inf1)  
                continue
            elif loc2 != "" and def2 == "noSim":
                ficin = "FastaGene/%s.tfa" % loc2
                ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower()
                alignement.run_blastn(ficin, ficout, database)
                if os.path.isfile(ficout):
                    lis = open(ficout,"r").read().split("\n")
                    if len(lis) == 2:
                        inf1 = "noSim"
                    else:
                        det = lis[1].split("\t")
                        if string.atof(det[2]) > 70:
                            inf1 = "%s-%s" % (det[1].split("|")[-1],det[2])  
                        else : 
                            inf1 = "soSignSim"  
                print "%s\t%s" % (loc2,inf1)  
                continue
            elif loc1 != "":     
                ficg1 = "FastaGene/%s.tfa" % loc1
                loc2 = def1.split("-")[0]
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2               
            elif loc2 != "":     
                ficg1 = "FastaGene/%s.tfa" % loc2
                loc1 = def2.split("-")[0]
                ficg2 = "FastaGene/%s.tfa" % loc1
                ficp1 = "FastaProt/%s.tfa" % loc2
                ficp2 = "FastaProt/%s.tfa" % loc1            
                
                
            outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())
            if not os.path.isfile("FastaGene/%s" % outf):
                alignement.ali_needle(ficg1,ficg2)
            if not os.path.isfile("FastaProt/%s" % outf):            
                alignement.ali_needle(ficp1,ficp2)
                
            idg = alignement.extrait_id_needle("FastaGene/%s" % outf)
            idp = alignement.extrait_id_needle("FastaProt/%s" % outf)
            if idg == "100":
                suf = 1
            else:
                if idg > 70:
                    suf = 2
                    totg = totg + string.atof(idg)
                    g = g + 1
                    totp = totp + string.atof(idp)
            print "%s\t%s\t%s\t%s" % (loc1,loc2,idg,idp)
                 
            
            
    print "\nMoyenne des pourcentages d identite (sans tenir compte des 100%)"
    print "au niveau des genes : %s" % (totg/g)
    print "au niveau des proteines: %s" % (totp/g)

コード例 #6

0

ファイルを表示

ファイル: allele2.py プロジェクト: bioinfocoderz/Scripts

def compAnnot(fic,database):
    """
    """
    totg = 0
    g = 0
    totp = 0
    lines = open(fic,"r").read().split("\n")
    for line in lines:
        if line != "":
            idg = "-"
            idp = "-"
            lis = line.split("\t")
            type1 = lis[0]
            type2 = lis[9]
            annot1 = lis[1]
            annot2 = lis[10]
            loc1 = lis[2]
            loc2 = lis[11]
            orf1 = lis[3]
            orf2 = lis[12]
            deb1 = lis[4]
            deb2 = lis[13]
            fin1 = lis[5]
            fin2 = lis[14]
            long1 = lis[6]
            long2 = lis[15]
            sens1 = lis[7]
            sens2 = lis[16]
            if loc1 != "" and loc2 != "":
                ficg1 = "FastaGene/%s.tfa" % loc1
                ficg2 = "FastaGene/%s.tfa" % loc2
                ficp1 = "FastaProt/%s.tfa" % loc1
                ficp2 = "FastaProt/%s.tfa" % loc2
                outf = "%s-%s.needle" % (files.get_name(ficg1).lower(),files.get_name(ficg2).lower())
                if not os.path.isfile("FastaGene/%s" % outf):
                    if os.path.isfile(ficg1) and os.path.isfile(ficg2):
                        alignement.ali_needle(ficg1,ficg2)
                if not os.path.isfile("FastaProt/%s" % outf):
                    if os.path.isfile(ficp1) and os.path.isfile(ficp2):    
                        alignement.ali_needle(ficp1,ficp2)
                if os.path.isfile("FastaGene/%s" % outf):
                    idg = alignement.extrait_id_needle("FastaGene/%s" % outf)
                if os.path.isfile("FastaProt/%s" % outf):    
                    idp = alignement.extrait_id_needle("FastaProt/%s" % outf)
                
                if idg == "100":
                    suf = 1
                else:
                    suf = 2
                    if idg != "-":
                        totg = totg + string.atof(idg)
                        g = g + 1
                        totp = totp + string.atof(idp)
                print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\t%s\t%s\t\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (type1,annot1,loc1,orf1,deb1,fin1,long1,sens1,idg,idp,type2,annot2,loc2,orf2,deb2,fin2,long2,sens2,suf) 
                continue
            
            else:     
                suf = ""
                idg = ""
                idp = ""
                inf1 = ""
                inf2 = ""
                # si n est pas un allele evident, je lance mon gene contre ma banque complete pour voir si y aurait pas une similarite significative
                if loc1 != "":
                    ficin = "FastaGene/%s.tfa" % loc1
                    if os.path.isfile(ficin):
                        ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower()
                        alignement.run_blastn(ficin, ficout, database)
                        if os.path.isfile(ficout):
                            lis = open(ficout,"r").read().split("\n")
                            if len(lis) == 2:
                                inf1 = "noSim"
                            else:
                                det = lis[1].split("\t")
                                inf1 = "%s-%s" % (det[1].split("|")[-1],det[2])   
                    
                if loc2 != "":
                    ficin = "FastaGene/%s.tfa" % loc2
                    if os.path.isfile(ficin):
                        ficout = "FastaGene/%s.blastn" % files.get_name(ficin).lower()
                        alignement.run_blastn(ficin, ficout, database)
                        if os.path.isfile(ficout):
                            lis = open(ficout,"r").read().split("\n")
                            if len(lis) == 2:
                                inf2 = "noSim"
                            else:
                                det = lis[1].split("\t")
                                inf2 = "%s-%s" % (det[1].split("|")[-1],det[2]) 
                            
                print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\t\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (type1,annot1,loc1,orf1,deb1,fin1,long1,sens1,inf1,inf2,type2,annot2,loc2,orf2,deb2,fin2,long2,sens2)   
    print "\nMoyenne des pourcentages d identite (sans tenir compte des 100%)"
    print "au niveau des genes : %s" % (totg/g)
    print "au niveau des proteines: %s" % (totp/g)