コード例 #1
0
ファイル: mafft.py プロジェクト: ElDeveloper/brokit
def align_two_alignments(aln1, aln2, moltype, params=None):
    """Returns an Alignment object from two existing Alignments.

    aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be
    used to build them.
        - Mafft profile alignment only works with aligned sequences. Alignment
        object used to handle unaligned sequences.

    params: dict of parameters to pass in to the Mafft app controller.
    """
    #create SequenceCollection object from seqs
    aln1 = Alignment(aln1,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln1_int_map, aln1_int_keys = aln1.getIntMap()
    #Create SequenceCollection from int_map.
    aln1_int_map = Alignment(aln1_int_map,MolType=moltype)
    
    #create Alignment object from aln
    aln2 = Alignment(aln2,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln2_int_map, aln2_int_keys = aln2.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln2_int_map = Alignment(aln2_int_map,MolType=moltype)
    
    #Update aln1_int_keys with aln2_int_keys
    aln1_int_keys.update(aln2_int_keys)
    
    #Create Mafft app.
    app = Mafft(InputHandler='_input_as_paths',\
        params=params,
        SuppressStderr=False)
    app._command = 'mafft-profile'
    
    aln1_path = app._tempfile_as_multiline_string(aln1_int_map.toFasta())
    aln2_path = app._tempfile_as_multiline_string(aln2_int_map.toFasta())
    filepaths = [aln1_path,aln2_path]
    
    #Get results using int_map as input to app
    res = app(filepaths)

    #Get alignment as dict out of results
    alignment = dict(parse_fasta(res['StdOut']))
    
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        key = k.replace('_seed_','')
        new_alignment[aln1_int_keys[key]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(aln1_path)
    remove(aln2_path)
    remove('pre')
    remove('trace')
    del(aln1,aln1_int_map,aln1_int_keys,\
        aln2,aln2_int_map,aln2_int_keys,app,res,alignment)

    return new_alignment
コード例 #2
0
ファイル: clustalw.py プロジェクト: jairideout/brokit
def align_two_alignments(aln1, aln2, moltype, params=None):
    """Returns an Alignment object from two existing Alignments.

    aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be
    used to build them.

    params: dict of parameters to pass in to the Clustal app controller.
    """
    #create SequenceCollection object from seqs
    aln1 = Alignment(aln1,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln1_int_map, aln1_int_keys = aln1.getIntMap()
    #Create SequenceCollection from int_map.
    aln1_int_map = Alignment(aln1_int_map,MolType=moltype)

    #create Alignment object from aln
    aln2 = Alignment(aln2,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln2_int_map, aln2_int_keys = aln2.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln2_int_map = Alignment(aln2_int_map,MolType=moltype)

    #Update aln1_int_keys with aln2_int_keys
    aln1_int_keys.update(aln2_int_keys)

    #Create Mafft app.
    app = Clustalw(InputHandler='_input_as_multiline_string',\
        params=params,
        SuppressStderr=True)
    app.Parameters['-align'].off()
    app.Parameters['-infile'].off()
    app.Parameters['-profile'].on()

    #Add aln_int_map as profile1
    app.Parameters['-profile1'].on(\
        app._tempfile_as_multiline_string(aln1_int_map.toFasta()))

    #Add seq_int_map as profile2
    app.Parameters['-profile2'].on(\
        app._tempfile_as_multiline_string(aln2_int_map.toFasta()))
    #Get results using int_map as input to app
    res = app()

    #Get alignment as dict out of results
    alignment = dict(ClustalParser(res['Align'].readlines()))

    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        new_alignment[aln1_int_keys[k]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(app.Parameters['-profile1'].Value)
    remove(app.Parameters['-profile2'].Value)
    del(aln1,aln1_int_map,aln1_int_keys,\
        aln2,aln2_int_map,aln2_int_keys,app,res,alignment)

    return new_alignment
コード例 #3
0
ファイル: mafft.py プロジェクト: mikerobeson/pycogent
def align_two_alignments(aln1, aln2, moltype, params=None):
    """Returns an Alignment object from two existing Alignments.

    aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be
    used to build them.
        - Mafft profile alignment only works with aligned sequences. Alignment
        object used to handle unaligned sequences.

    params: dict of parameters to pass in to the Mafft app controller.
    """
    #create SequenceCollection object from seqs
    aln1 = Alignment(aln1,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln1_int_map, aln1_int_keys = aln1.getIntMap()
    #Create SequenceCollection from int_map.
    aln1_int_map = Alignment(aln1_int_map,MolType=moltype)
    
    #create Alignment object from aln
    aln2 = Alignment(aln2,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln2_int_map, aln2_int_keys = aln2.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln2_int_map = Alignment(aln2_int_map,MolType=moltype)
    
    #Update aln1_int_keys with aln2_int_keys
    aln1_int_keys.update(aln2_int_keys)
    
    #Create Mafft app.
    app = Mafft(InputHandler='_input_as_paths',\
        params=params,
        SuppressStderr=False)
    app._command = 'mafft-profile'
    
    aln1_path = app._tempfile_as_multiline_string(aln1_int_map.toFasta())
    aln2_path = app._tempfile_as_multiline_string(aln2_int_map.toFasta())
    filepaths = [aln1_path,aln2_path]
    
    #Get results using int_map as input to app
    res = app(filepaths)

    #Get alignment as dict out of results
    alignment = dict(MinimalFastaParser(res['StdOut'].readlines()))
    
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        key = k.replace('_seed_','')
        new_alignment[aln1_int_keys[key]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(aln1_path)
    remove(aln2_path)
    remove('pre')
    remove('trace')
    del(aln1,aln1_int_map,aln1_int_keys,\
        aln2,aln2_int_map,aln2_int_keys,app,res,alignment)

    return new_alignment
コード例 #4
0
ファイル: clustalw.py プロジェクト: pombredanne/pycogent-1
def add_seqs_to_alignment(seqs, aln, moltype, params=None):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.alignment.SequenceCollection object, or data that can
    be used to build one.

    aln: a cogent.core.alignment.Alignment object, or data that can be used to
    build one

    params: dict of parameters to pass in to the Clustal app controller.
    """
    # create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs, MolType=moltype)
    # Create mapping between abbreviated IDs and full IDs
    seq_int_map, seq_int_keys = seq_collection.getIntMap()
    # Create SequenceCollection from int_map.
    seq_int_map = SequenceCollection(seq_int_map, MolType=moltype)

    # create Alignment object from aln
    aln = Alignment(aln, MolType=moltype)
    # Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln.getIntMap(prefix="seqn_")
    # Create SequenceCollection from int_map.
    aln_int_map = Alignment(aln_int_map, MolType=moltype)

    # Update seq_int_keys with aln_int_keys
    seq_int_keys.update(aln_int_keys)

    # Create Mafft app.
    app = Clustalw(InputHandler="_input_as_multiline_string", params=params, SuppressStderr=True)
    app.Parameters["-align"].off()
    app.Parameters["-infile"].off()
    app.Parameters["-sequences"].on()

    # Add aln_int_map as profile1
    app.Parameters["-profile1"].on(app._tempfile_as_multiline_string(aln_int_map.toFasta()))

    # Add seq_int_map as profile2
    app.Parameters["-profile2"].on(app._tempfile_as_multiline_string(seq_int_map.toFasta()))
    # Get results using int_map as input to app
    res = app()

    # Get alignment as dict out of results
    alignment = dict(ClustalParser(res["Align"].readlines()))

    # Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        new_alignment[seq_int_keys[k]] = v
    # Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)
    # Clean up
    res.cleanUp()
    remove(app.Parameters["-profile1"].Value)
    remove(app.Parameters["-profile2"].Value)
    del (seq_collection, seq_int_map, seq_int_keys, aln, aln_int_map, aln_int_keys, app, res, alignment)

    return new_alignment
コード例 #5
0
ファイル: pplacer.py プロジェクト: biocore/burrito-fillings
def insert_sequences_into_tree(aln, moltype, params={},
                                           write_log=True):
    """Returns a tree from Alignment object aln.

    aln: an xxx.Alignment object, or data that can be used to build one.

    moltype: cogent.core.moltype.MolType object

    params: dict of parameters to pass in to the RAxML app controller.

    The result will be an xxx.Alignment object, or None if tree fails.
    """

    # convert aln to phy since seq_names need fixed to run through pplacer

    new_aln=get_align_for_phylip(StringIO(aln))

    # convert aln to fasta in case it is not already a fasta file
    aln2 = Alignment(new_aln)
    seqs = aln2.toFasta()

    ih = '_input_as_multiline_string'

    pplacer_app = Pplacer(params=params,
                      InputHandler=ih,
                      WorkingDir=None,
                      SuppressStderr=False,
                      SuppressStdout=False)

    pplacer_result = pplacer_app(seqs)

    # write a log file
    if write_log:
        log_fp = join(params["--out-dir"],'log_pplacer_' + \
                      split(get_tmp_filename())[-1])
        log_file=open(log_fp,'w')
        log_file.write(pplacer_result['StdOut'].read())
        log_file.close()

    # use guppy to convert json file into a placement tree
    guppy_params={'tog':None}

    new_tree=build_tree_from_json_using_params(pplacer_result['json'].name, \
                                               output_dir=params['--out-dir'], \
                                               params=guppy_params)

    pplacer_result.cleanUp()

    return new_tree
コード例 #6
0
ファイル: pplacer.py プロジェクト: cxhernandez/pycogent
def insert_sequences_into_tree(aln, moltype, params={},
                                           write_log=True):
    """Returns a tree from Alignment object aln.

    aln: an xxx.Alignment object, or data that can be used to build one.

    moltype: cogent.core.moltype.MolType object

    params: dict of parameters to pass in to the RAxML app controller.

    The result will be an xxx.Alignment object, or None if tree fails.
    """

    # convert aln to phy since seq_names need fixed to run through pplacer
    
    new_aln=get_align_for_phylip(StringIO(aln))

    # convert aln to fasta in case it is not already a fasta file
    aln2 = Alignment(new_aln)
    seqs = aln2.toFasta()

    ih = '_input_as_multiline_string'    

    pplacer_app = Pplacer(params=params,
                      InputHandler=ih,
                      WorkingDir=None,
                      SuppressStderr=False,
                      SuppressStdout=False)
    
    pplacer_result = pplacer_app(seqs)

    # write a log file
    if write_log:
        log_fp = join(params["--out-dir"],'log_pplacer_' + \
                      split(get_tmp_filename())[-1])
        log_file=open(log_fp,'w')
        log_file.write(pplacer_result['StdOut'].read())
        log_file.close()
        
    # use guppy to convert json file into a placement tree
    guppy_params={'tog':None}
    
    new_tree=build_tree_from_json_using_params(pplacer_result['json'].name, \
                                               output_dir=params['--out-dir'], \
                                               params=guppy_params)

    pplacer_result.cleanUp()
    
    return new_tree
コード例 #7
0
ファイル: parsinsert.py プロジェクト: chungtseng/pycogent
def insert_sequences_into_tree(aln, moltype, params={}):
    """Returns a tree from placement of sequences
    """
    # convert aln to phy since seq_names need fixed to run through parsinsert
    new_aln=get_align_for_phylip(StringIO(aln))

    # convert aln to fasta in case it is not already a fasta file
    aln2 = Alignment(new_aln)
    seqs = aln2.toFasta()

    parsinsert_app = ParsInsert(params=params)
    result = parsinsert_app(seqs)
    
    # parse tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    
    # cleanup files
    result.cleanUp()
    
    return tree
コード例 #8
0
def insert_sequences_into_tree(aln, moltype, params={}):
    """Returns a tree from placement of sequences
    """
    # convert aln to phy since seq_names need fixed to run through parsinsert
    new_aln = get_align_for_phylip(StringIO(aln))

    # convert aln to fasta in case it is not already a fasta file
    aln2 = Alignment(new_aln)
    seqs = aln2.toFasta()

    parsinsert_app = ParsInsert(params=params)
    result = parsinsert_app(seqs)

    # parse tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)

    # cleanup files
    result.cleanUp()

    return tree
コード例 #9
0
ファイル: align_seqs.py プロジェクト: Sandy4321/FizzyQIIME
 def __call__(self, seq_path, result_path=None, log_path=None, \
     failure_path=None, cmbuild_params=None, cmalign_params=None):
     
     log_params = []
     # load candidate sequences
     candidate_sequences = dict(MinimalFastaParser(open(seq_path,'U')))
     
     # load template sequences
     try:
         info, template_alignment, struct = list(MinimalRfamParser(open(\
             self.Params['template_filepath'],'U'),\
             seq_constructor=ChangedSequence))[0]
     except RecordError:
         raise ValueError, "Template alignment must be in Stockholm format with corresponding secondary structure annotation when using InfernalAligner."
     
     moltype = self.Params['moltype']
     
     #Need to make separate mapping for unaligned sequences
     unaligned = SequenceCollection(candidate_sequences,MolType=moltype)
     int_map, int_keys = unaligned.getIntMap(prefix='unaligned_')
     int_map = SequenceCollection(int_map,MolType=moltype)
     
     #Turn on --gapthresh option in cmbuild to force alignment to full model
     if cmbuild_params is None:
         cmbuild_params = {}
     cmbuild_params.update({'--gapthresh':1.0})
     
     #record cmbuild parameters
     log_params.append('cmbuild parameters:')
     log_params.append(str(cmbuild_params))
     
     #Turn on --sub option in Infernal, since we know the unaligned sequences
     # are fragments.
     #Also turn on --gapthresh to use same gapthresh as was used to build
     # model
     
     if cmalign_params is None:
         cmalign_params = {}
     cmalign_params.update({'--sub':True,'--gapthresh':1.0})
     
     #record cmalign parameters
     log_params.append('cmalign parameters:')
     log_params.append(str(cmalign_params))
     
     #Align sequences to alignment including alignment gaps.
     aligned, struct_string = cmalign_from_alignment(aln=template_alignment,\
         structure_string=struct,\
         seqs=int_map,\
         moltype=moltype,\
         include_aln=True,\
         params=cmalign_params,\
         cmbuild_params=cmbuild_params)
     
     #Pull out original sequences from full alignment.
     infernal_aligned={}
     aligned_dict = aligned.NamedSeqs
     for key in int_map.Names:
         infernal_aligned[int_keys.get(key,key)]=aligned_dict[key]
     
     #Create an Alignment object from alignment dict
     infernal_aligned = Alignment(infernal_aligned,MolType=moltype)
     
     if log_path is not None:
         log_file = open(log_path,'w')
         log_file.write('\n'.join(log_params))
         log_file.close()
     
     if result_path is not None:
         result_file = open(result_path,'w')
         result_file.write(infernal_aligned.toFasta())
         result_file.close()
         return None
     else:
         try:
             return infernal_aligned
         except ValueError:
             return {}
コード例 #10
0
def add_seqs_to_alignment(seqs, aln, moltype, params=None):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.alignment.SequenceCollection object, or data that can
    be used to build one.

    aln: a cogent.core.alignment.Alignment object, or data that can be used to
    build one

    params: dict of parameters to pass in to the Clustal app controller.
    """
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs, MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    seq_int_map, seq_int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    seq_int_map = SequenceCollection(seq_int_map, MolType=moltype)

    #create Alignment object from aln
    aln = Alignment(aln, MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln_int_map = Alignment(aln_int_map, MolType=moltype)

    #Update seq_int_keys with aln_int_keys
    seq_int_keys.update(aln_int_keys)

    #Create Mafft app.
    app = Clustalw(InputHandler='_input_as_multiline_string',\
        params=params,
        SuppressStderr=True)
    app.Parameters['-align'].off()
    app.Parameters['-infile'].off()
    app.Parameters['-sequences'].on()

    #Add aln_int_map as profile1
    app.Parameters['-profile1'].on(\
        app._tempfile_as_multiline_string(aln_int_map.toFasta()))

    #Add seq_int_map as profile2
    app.Parameters['-profile2'].on(\
        app._tempfile_as_multiline_string(seq_int_map.toFasta()))
    #Get results using int_map as input to app
    res = app()

    #Get alignment as dict out of results
    alignment = dict(ClustalParser(res['Align'].readlines()))

    #Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        new_alignment[seq_int_keys[k]] = v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(app.Parameters['-profile1'].Value)
    remove(app.Parameters['-profile2'].Value)
    del(seq_collection,seq_int_map,seq_int_keys,\
        aln,aln_int_map,aln_int_keys,app,res,alignment)

    return new_alignment
コード例 #11
0
ファイル: clearcut.py プロジェクト: cxhernandez/pycogent
def build_tree_from_alignment(aln, moltype, best_tree=False, params={},\
    working_dir='/tmp'):
    """Returns a tree from Alignment object aln.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.
        -  Clearcut only accepts aligned sequences.  Alignment object used to
        handle unaligned sequences.
    
    moltype: a cogent.core.moltype object.
        - NOTE: If moltype = RNA, we must convert to DNA since Clearcut v1.0.8
        gives incorrect results if RNA is passed in.  'U' is treated as an 
        incorrect character and is excluded from distance calculations.

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clearcut app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    params['--out'] = get_tmp_filename(working_dir)
    
    # Create instance of app controller, enable tree, disable alignment
    app = Clearcut(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir=working_dir, SuppressStdout=True,\
                   SuppressStderr=True)
    #Input is an alignment
    app.Parameters['-a'].on()
    #Turn off input as distance matrix
    app.Parameters['-d'].off()
    
    #If moltype = RNA, we must convert to DNA.
    if moltype == RNA:
        moltype = DNA
    
    if best_tree:
        app.Parameters['-N'].on()
    
    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()    

    # Setup mapping. Clearcut clips identifiers. We will need to remap them.
    # Clearcut only accepts aligned sequences.  Let Alignment object handle
    # unaligned sequences.
    seq_aln = Alignment(aln,MolType=moltype)
    #get int mapping
    int_map, int_keys = seq_aln.getIntMap()
    #create new Alignment object with int_map
    int_map = Alignment(int_map)

    # Collect result
    result = app(int_map.toFasta())
    
    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del(seq_aln, app, result, int_map, int_keys, params)

    return tree
コード例 #12
0
def add_seqs_to_alignment(seqs, aln, moltype, params=None, accurate=False):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.sequence.Sequence object, or data that can be used
    to build one.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one

    params: dict of parameters to pass in to the Mafft app controller.
    """
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    seq_int_map, seq_int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    seq_int_map = SequenceCollection(seq_int_map,MolType=moltype)
    
    #create Alignment object from aln
    aln = Alignment(aln,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln_int_map = Alignment(aln_int_map,MolType=moltype)
    
    #Update seq_int_keys with aln_int_keys
    seq_int_keys.update(aln_int_keys)
    
    #Create Mafft app.
    app = Mafft(InputHandler='_input_as_multiline_string',\
        params=params,
        SuppressStderr=True)
    
    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()
    
    #Do not report progress
    app.Parameters['--quiet'].on()
    
    #Add aln_int_map as seed alignment
    app.Parameters['--seed'].on(\
        app._tempfile_as_multiline_string(aln_int_map.toFasta()))
        
    #More accurate alignment, sacrificing performance.
    if accurate:
        app.Parameters['--globalpair'].on()
        app.Parameters['--maxiterate'].Value=1000
    
    #Get results using int_map as input to app
    res = app(seq_int_map.toFasta())
    #Get alignment as dict out of results
    alignment = dict(MinimalFastaParser(res['StdOut'].readlines()))
    
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in list(alignment.items()):
        key = k.replace('_seed_','')
        new_alignment[seq_int_keys[key]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(app.Parameters['--seed'].Value)
    del(seq_collection,seq_int_map,seq_int_keys,\
        aln,aln_int_map,aln_int_keys,app,res,alignment)

    return new_alignment
コード例 #13
0
ファイル: mafft.py プロジェクト: ElDeveloper/brokit
def add_seqs_to_alignment(seqs, aln, moltype, params=None, accurate=False):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.sequence.Sequence object, or data that can be used
    to build one.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one

    params: dict of parameters to pass in to the Mafft app controller.
    """
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    seq_int_map, seq_int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    seq_int_map = SequenceCollection(seq_int_map,MolType=moltype)
    
    #create Alignment object from aln
    aln = Alignment(aln,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln_int_map = Alignment(aln_int_map,MolType=moltype)
    
    #Update seq_int_keys with aln_int_keys
    seq_int_keys.update(aln_int_keys)
    
    #Create Mafft app.
    app = Mafft(InputHandler='_input_as_multiline_string',\
        params=params,
        SuppressStderr=True)
    
    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()
    
    #Do not report progress
    app.Parameters['--quiet'].on()
    
    #Add aln_int_map as seed alignment
    app.Parameters['--seed'].on(\
        app._tempfile_as_multiline_string(aln_int_map.toFasta()))
        
    #More accurate alignment, sacrificing performance.
    if accurate:
        app.Parameters['--globalpair'].on()
        app.Parameters['--maxiterate'].Value=1000
    
    #Get results using int_map as input to app
    res = app(seq_int_map.toFasta())
    #Get alignment as dict out of results
    alignment = dict(parse_fasta(res['StdOut']))
    
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        key = k.replace('_seed_','')
        new_alignment[seq_int_keys[key]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(app.Parameters['--seed'].Value)
    del(seq_collection,seq_int_map,seq_int_keys,\
        aln,aln_int_map,aln_int_keys,app,res,alignment)

    return new_alignment
#pycogent  module
from cogent.core.alignment import Alignment

fasta_file = open('H**o.fasta')
ali = Alignment(fasta_file.read())
print(ali.toFasta())

for column in ali.iterPositions():
    gap_fraction = float(column.count('-')) / len(column)
    print('%4.2f' % gap_fraction, )
print()
コード例 #15
0
ファイル: clearcut.py プロジェクト: cxhernandez/pycogent
def build_tree_from_alignment(aln, moltype, best_tree=False, params={},\
    working_dir='/tmp'):
    """Returns a tree from Alignment object aln.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.
        -  Clearcut only accepts aligned sequences.  Alignment object used to
        handle unaligned sequences.
    
    moltype: a cogent.core.moltype object.
        - NOTE: If moltype = RNA, we must convert to DNA since Clearcut v1.0.8
        gives incorrect results if RNA is passed in.  'U' is treated as an 
        incorrect character and is excluded from distance calculations.

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clearcut app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    params['--out'] = get_tmp_filename(working_dir)

    # Create instance of app controller, enable tree, disable alignment
    app = Clearcut(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir=working_dir, SuppressStdout=True,\
                   SuppressStderr=True)
    #Input is an alignment
    app.Parameters['-a'].on()
    #Turn off input as distance matrix
    app.Parameters['-d'].off()

    #If moltype = RNA, we must convert to DNA.
    if moltype == RNA:
        moltype = DNA

    if best_tree:
        app.Parameters['-N'].on()

    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()

    # Setup mapping. Clearcut clips identifiers. We will need to remap them.
    # Clearcut only accepts aligned sequences.  Let Alignment object handle
    # unaligned sequences.
    seq_aln = Alignment(aln, MolType=moltype)
    #get int mapping
    int_map, int_keys = seq_aln.getIntMap()
    #create new Alignment object with int_map
    int_map = Alignment(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_aln, app, result, int_map, int_keys, params)

    return tree