Python BiologicalSequence.to_fasta 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: skbio.sequence

클래스/타입: BiologicalSequence

메소드/함수: to_fasta

hotexamples.com에서의 예제들: 3

Python BiologicalSequence.to_fasta - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 skbio.sequence.BiologicalSequence.to_fasta에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

BiologicalSequence(14)

to_fasta(2)

distance(1)

is_gap(1)

upper(1)

예제 #1

파일 보기

def MakeGeneraFastas(fin_taxonomy,fin_repset):
    global repsetdic,taxdic,taxgendic,repsetIDlist,repgenlist,generaSeqIDdic
    fin_repset = open(fin_repset,"U")
    fin_taxonomy = open(fin_taxonomy,"U")
    repsetdic = {}
    for label, seq in parse_fasta(fin_repset,ignore_comment=True):
        repsetdic[label] = seq
    taxdic = {}
    taxgendic = {}
    for line in fin_taxonomy:
        line = line.split("\t")
        accessionID = line[0]
        taxonomyline = line[1]
        genus = taxonomyline.split(";")
        genus = genus[-2]
        if genus[0:3] == "g__":
            genus = genus[3:]
        taxgendic[accessionID] = genus
        taxdic[accessionID] = taxonomyline
    fin_taxonomy.close()
    fin_repset.close()
    repsetIDlist = []
    repsetIDlist = repsetdic.keys()
    repgenlist = []
    for i in repsetIDlist:
        genus = taxgendic[i]
        if genus not in repgenlist:
            repgenlist.append(genus)
    generaSeqIDdic = {}
    for m in repgenlist:
        IDnumlist = []
        generaSeqIDdic[m] = IDnumlist
    for key in taxgendic:
        if key in repsetIDlist:
            try:
                g = taxgendic[key]
                generaSeqIDdic[g].append(key)
            except:
                continue
    from skbio.sequence import BiologicalSequence
    for genus in generaSeqIDdic:
        fout = open("g__"+genus+"_seqs.fasta","w")
        seqlist = []
        seqlist = generaSeqIDdic[genus]
        for i in seqlist:
            seq = repsetdic[i]
            t = BiologicalSequence(seq,id=i)
            line = (t.to_fasta(terminal_character=""))
            fout.write(line)
            fout.write("\n")
        fout.close()
    cwd = os.getcwd()
    for file in os.listdir(cwd):
        if os.path.getsize(file) < 1:
            os.remove(file)
    return repsetdic,taxdic,taxgendic,repsetIDlist,repgenlist,generaSeqIDdic

예제 #2

파일 보기

파일: skbio_hybrid_tree_revision.py 프로젝트: jairideout/ghost-tree

def make_genera_fastas(fin_taxonomy, fin_repset):
    """Takes ITS fasta file representative sequences and sorts the
       OTUs/species into their corresponding genus file.  This allows
       OTUs to be compared to other OTUs from the same genus.

    Parameters
    ----------
    repsetdic : dict
        A dictionary containing the label (key)
        and sequence (value) from ITS representative sequences file.


    repgenlist : list
        A list that contains all unique genera from ITS fasta file.


    taxgendic: dict
        A dictionary containing accession ID (key) and genus only
        from the Unite taxonomy file. ***** not used currently


    repsetIDlist : list
        A list that contains all of the IDs from the representative ITS
        sequences.



    Returns
    ----------

    Examples
    ----------
    Input is a representative sequence fasta file where each sequence
    corresponds to one representative for all of the OTUs in each
    cluster.  Each sequence has an accession ID that corresponds to
    one sequence in the Unite database.

    Example of one representative fasta sequence from the input
    fasta file:


    >>AB015922 Some_comment_ie_sample_location
    CAGAGCCAAGAGATCCGTTGTTGAAAGTTTTTTCAATTCAAGAATAAAACTTAGACTGCAAAG
    ACAACATGAGTTTGGTTTGGGTCTTTGGCGGACACGCTCCAGCCGAAGCCGGTGGGCGGCCGA
    CGCCAGTCCTCACGAACAGCGCCGACGTAGCCCGGCCCGCCAAAGCAACAAGATATAAATCGA
    CACGGGTGGGAGGGTCGACCCAGCACGC


    Example of a taxonomy line:

    AY880934 k__Fungi;p__Basidiomycota;c__Agaricomycetes;
    o__Thelephorales;f__Thelephoraceae;g__Thelephora;
    s__Thelephora_terrestris



    This code identifies the genus of all OTUs by looking at the
    accession number from the fasta sequence, then looking at the
    Unite taxonomy file and identifying the genus the sequence
    belongs to. The OTUs then get sorted into genus files that
    have one or more OTUs/species per file.

    """
    global repgenlist
    fin_repset = open(fin_repset, "U")
    fin_taxonomy = open(fin_taxonomy, "U")
    repsetdic = {}
    for label, seq in parse_fasta(fin_repset, ignore_comment=True):
        repsetdic[label] = seq
    taxgendic = {}
    for line in fin_taxonomy:
        line = line.split("\t")
        accessionID = line[0]
        taxonomyline = line[1]
        genus = taxonomyline.split(";")
        genus = genus[-2]
        if genus.startswith("g__"):
            genus = genus[3:]
        taxgendic[accessionID] = genus
    fin_taxonomy.close()
    fin_repset.close()
    repsetIDlist = []
    repsetIDlist = repsetdic.keys()
    repgenlist = []
    for i in repsetIDlist:
        genus = taxgendic[i]
        if genus not in repgenlist:
            repgenlist.append(genus)
    generaSeqIDdic = {}
    for m in repgenlist:
        IDnumlist = []
        generaSeqIDdic[m] = IDnumlist
    for key in taxgendic:
        if key in repsetIDlist:
            try:
                g = taxgendic[key]
                generaSeqIDdic[g].append(key)
            except:
                continue
    from skbio.sequence import BiologicalSequence
    for genus in generaSeqIDdic:
        fout = open("g__" + genus + "_seqs.fasta", "w")
        seqlist = []
        seqlist = generaSeqIDdic[genus]
        for i in seqlist:
            seq = repsetdic[i]
            t = BiologicalSequence(seq, id=i)
            line = (t.to_fasta(terminal_character=""))
            fout.write(line)
            fout.write("\n")
        fout.close()
    for file in os.listdir(cwd):
        if os.path.getsize(file) < 1:
            os.remove(file)
    return repgenlist

예제 #3

파일 보기

파일: skbio_hybrid_tree_revision.py 프로젝트: gregcaporaso/ghost-tree

def make_genera_fastas(fin_taxonomy,fin_repset):
    """Takes ITS fasta file representative sequences and sorts the
       OTUs/species into their corresponding genus file.  This allows
       OTUs to be compared to other OTUs from the same genus.

    Parameters
    ----------
    repsetdic : dict
        A dictionary containing the label (key)
        and sequence (value) from ITS representative sequences file.


    repgenlist : list
        A list that contains all unique genera from ITS fasta file.


    taxgendic: dict
        A dictionary containing accession ID (key) and genus only
        from the Unite taxonomy file. ***** not used currently


    repsetIDlist : list
        A list that contains all of the IDs from the representative ITS
        sequences.



    Returns
    ----------

    Examples
    ----------
    Input is a representative sequence fasta file where each sequence
    corresponds to one representative for all of the OTUs in each
    cluster.  Each sequence has an accession ID that corresponds to
    one sequence in the Unite database.

    Example of one representative fasta sequence from the input
    fasta file:


    >>AB015922 Some_comment_ie_sample_location
    CAGAGCCAAGAGATCCGTTGTTGAAAGTTTTTTCAATTCAAGAATAAAACTTAGACTGCAAAG
    ACAACATGAGTTTGGTTTGGGTCTTTGGCGGACACGCTCCAGCCGAAGCCGGTGGGCGGCCGA
    CGCCAGTCCTCACGAACAGCGCCGACGTAGCCCGGCCCGCCAAAGCAACAAGATATAAATCGA
    CACGGGTGGGAGGGTCGACCCAGCACGC


    Example of a taxonomy line:

    AY880934 k__Fungi;p__Basidiomycota;c__Agaricomycetes;
    o__Thelephorales;f__Thelephoraceae;g__Thelephora;
    s__Thelephora_terrestris



    This code identifies the genus of all OTUs by looking at the
    accession number from the fasta sequence, then looking at the
    Unite taxonomy file and identifying the genus the sequence
    belongs to. The OTUs then get sorted into genus files that
    have one or more OTUs/species per file.

    """
    global repgenlist
    fin_repset = open(fin_repset,"U")
    fin_taxonomy = open(fin_taxonomy,"U")
    repsetdic = {}
    for label, seq in parse_fasta(fin_repset,ignore_comment=True):
        repsetdic[label] = seq
    taxgendic = {}
    for line in fin_taxonomy:
        line = line.split("\t")
        accessionID = line[0]
        taxonomyline = line[1]
        genus = taxonomyline.split(";")
        genus = genus[-2]
        if genus.startswith("g__"):
            genus = genus[3:]
        taxgendic[accessionID] = genus
    fin_taxonomy.close()
    fin_repset.close()
    repsetIDlist = []
    repsetIDlist = repsetdic.keys()
    repgenlist = []
    for i in repsetIDlist:
        genus = taxgendic[i]
        if genus not in repgenlist:
            repgenlist.append(genus)
    generaSeqIDdic = {}
    for m in repgenlist:
        IDnumlist = []
        generaSeqIDdic[m] = IDnumlist
    for key in taxgendic:
        if key in repsetIDlist:
            try:
                g = taxgendic[key]
                generaSeqIDdic[g].append(key)
            except:
                continue
    from skbio.sequence import BiologicalSequence
    for genus in generaSeqIDdic:
        fout = open("g__"+genus+"_seqs.fasta","w")
        seqlist = []
        seqlist = generaSeqIDdic[genus]
        for i in seqlist:
            seq = repsetdic[i]
            t = BiologicalSequence(seq,id=i)
            line = (t.to_fasta(terminal_character=""))
            fout.write(line)
            fout.write("\n")
        fout.close()
    for file in os.listdir(cwd):
        if os.path.getsize(file) < 1:
            os.remove(file)
    return repgenlist