Python MultipleSeqAlignment.append 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Bio.AlignIO

클래스/타입: MultipleSeqAlignment

메소드/함수: append

hotexamples.com에서의 예제들: 3

Python MultipleSeqAlignment.append - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Bio.AlignIO.MultipleSeqAlignment.append에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

MultipleSeqAlignment(26)

append(2)

format(1)

get_alignment_length(1)

sort(1)

예제 #1

파일 보기

def retrieve_alignment(tre, alnpath, taxonset=range(0, 101), delimiter='_'):
    """
    Parameters
    ----------------
    tre : single-copy treeswift tree generated from James's code
    alnpath : path to the phylip formatted alignment of the genes. The row labels should be a superset of the leafset of 'tre'
    seqlen : sequence length parameter, only the first seqlen columns are taken from the MSA
    taxonset: set, the taxon set of the entire dataset

    Returns the MSA that corresponds to the input tree.
    """
    aln = AlignIO.read(open(alnpath), "phylip")
    seqlen = len(aln[0].seq)
    blank = "-" * seqlen
    whitelist = set(tre.labels(True, False))
    rest = set(taxonset)
    #print(rest)
    res = MultipleSeqAlignment([])
    for r in aln[:, :seqlen]:
        if r.id in whitelist:
            rid = r.id.split(delimiter)[0]
            rid_i = rid
            res.append(SeqRecord(r.seq, id=rid))
            rest.remove(rid_i)
    for rst in rest:
        res.append(SeqRecord(Seq(blank), id=str(rst)))
    res.sort()
    return res

예제 #2

파일 보기

파일: test_pandascharm.py 프로젝트: jmenglund/pandas-charm

 def dict_to_bioalignment(d, alphabet='generic_alphabet', sorted=True):
     """
     Create a BioPython MultipleSequenceAlignment
     from a dict with pairs consisting of id and sequence.
     """
     alignment = MultipleSeqAlignment([])
     bio_alphabet = getattr(Bio.Alphabet, alphabet)
     for id, seq in d.items():
         seq_record = SeqRecord(Seq(seq, alphabet=bio_alphabet), id=id)
         alignment.append(seq_record)
     if sorted:
         alignment.sort()
     return alignment

예제 #3

파일 보기

파일: read_data.py 프로젝트: dunleavy005/samm

def impute_ancestors_dnapars(seqs,
                             gl_seq,
                             scratch_dir,
                             gl_name='germline',
                             verbose=True):
    """
    Compute ancestral states via maximum parsimony

    @param seqs: list of sequences
    @param gl_seq: germline sequence
    @param scratch_dir: where to write intermediate dnapars files
    @param gl_name: name of germline (must be less than 10 characters long)

    @return genes_line: information needed to output imputed germline data
    @return seqs_line: information needed to output imputed sequence data
    """
    from gctree.bin.phylip_parse import parse_outfile

    assert (len(gl_name) < 10)

    infile, config, outfile = [
        os.path.join(scratch_dir, fname) for fname in [
            'infile',
            'dnapars.cfg',
            'outfile',
        ]
    ]

    aln = MultipleSeqAlignment([SeqRecord(Seq(gl_seq), id=gl_name)])

    # sequence ID must be less than ten characters, but also dnapars sets internal node
    # names to 1, 2, 3, ..., so name them numbers descending from 100 million, hoping
    # we won't ever have a clone that big...
    for idx, seq in enumerate(seqs):
        aln.append(SeqRecord(Seq(seq), id=str(99999999 - idx)))

    # dnapars uses the name "infile" as default input phylip file
    with open(infile, 'w') as phylip_file:
        phylip_file.write(aln.format('phylip'))

    # and we need to tell it the line where the root sequence occurs
    with open(infile, 'r') as phylip_file:
        for lineno, line in enumerate(phylip_file):
            if line.startswith(gl_name):
                naive_idx = str(lineno)

    # arcane user options for dnapars
    # 'O', naive_idx: the location of the outgroup root
    # 'S', 'Y': less thorough search; runs much faster but output is less exhaustive
    # 'J', 13, 10: randomize input ("jumble") using seed 13 and jumbling 10 times
    # 4: print out steps in each site (to get all nucleotide info)
    # 5: print sequences in at all nodes (to get ancestors)
    # '.': use dot-differencing for display
    # 'Y': accept these options
    with open(config, 'w') as cfg_file:
        cfg_file.write('\n'.join(
            ['O', naive_idx, 'S', 'Y', 'J', '13', '10', '4', '5', '.', 'Y']))

    # defer to command line to construct parsimony trees and ancestral states
    # dnapars has weird behavior if outfile and outtree already exist o_O
    cmd = [
        'cd', scratch_dir, '&& rm -f outfile outtree && dnapars <',
        os.path.basename(config), '> dnapars.log'
    ]
    if verbose:
        print "Calling:", " ".join(cmd)
    res = subprocess.call([" ".join(cmd)], shell=True)

    # phew, finally got some trees
    trees = parse_outfile(outfile, countfile=None, naive=gl_name)

    # take first parsimony tree
    genes_line = []
    seq_line = []
    for idx, descendant in enumerate(trees[0].traverse('preorder')):
        if descendant.is_root():
            descendant.name = gl_name
        else:
            # use dummy name for internal node sequences
            descendant.name = '-'.join([descendant.up.name, descendant.name])
            if [descendant.up.name,
                    descendant.up.sequence.lower()] not in genes_line:
                genes_line.append(
                    [descendant.up.name,
                     descendant.up.sequence.lower()])
            seq_line.append([
                descendant.up.name, descendant.name,
                descendant.sequence.lower()
            ])

    return genes_line, seq_line