Python SequenceCollection.toFasta 예제들, cogent.core.alignment.SequenceCollection.toFasta Python 예제들

예제 #1

0

파일 보기

파일: clustalw.py 프로젝트: pombredanne/pycogent-1

def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    moltype: cogent.core.moltype.MolType object

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Clustalw(InputHandler="_input_as_multiline_string", params=params, WorkingDir="/tmp")
    app.Parameters["-align"].off()

    # Set params to empty dict if None.
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params["-type"] = "d"
    elif moltype == PROTEIN:
        params["-type"] = "p"
    else:
        raise ValueError, "moltype must be DNA, RNA, or PROTEIN"

    # best_tree -> bootstrap
    if best_tree:
        if "-bootstrap" not in params:
            app.Parameters["-bootstrap"].on(1000)
        if "-seed" not in params:
            app.Parameters["-seed"].on(randint(0, 1000))
        if "-bootlabels" not in params:
            app.Parameters["-bootlabels"].on("nodes")
    else:
        app.Parameters["-tree"].on()

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result["Tree"].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_collection, app, result, int_map, int_keys)

    return tree

예제 #2

0

파일 보기

def align_unaligned_seqs(seqs, moltype, params=None):
    """Returns an Alignment object from seqs.

    seqs: cogent.core.alignment.SequenceCollection object, or data that can be
    used to build one.
    
    moltype: a MolType object.  DNA, RNA, or PROTEIN.

    params: dict of parameters to pass in to the Clustal app controller.
    
    Result will be a cogent.core.alignment.Alignment object.
    """
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs, MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map, MolType=moltype)
    #Create Clustalw app.
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params)
    #Get results using int_map as input to app
    res = app(int_map.toFasta())
    #Get alignment as dict out of results
    alignment = dict(ClustalParser(res['Align'].readlines()))
    #Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        new_alignment[int_keys[k]] = v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)
    #Clean up
    res.cleanUp()
    del (seq_collection, int_map, int_keys, app, res, alignment)

    return new_alignment

예제 #3

0

파일 보기

파일: clustalw.py 프로젝트: pombredanne/pycogent-1

def align_unaligned_seqs(seqs, moltype, params=None):
    """Returns an Alignment object from seqs.

    seqs: cogent.core.alignment.SequenceCollection object, or data that can be
    used to build one.
    
    moltype: a MolType object.  DNA, RNA, or PROTEIN.

    params: dict of parameters to pass in to the Clustal app controller.
    
    Result will be a cogent.core.alignment.Alignment object.
    """
    # create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs, MolType=moltype)
    # Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    # Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map, MolType=moltype)
    # Create Clustalw app.
    app = Clustalw(InputHandler="_input_as_multiline_string", params=params)
    # Get results using int_map as input to app
    res = app(int_map.toFasta())
    # Get alignment as dict out of results
    alignment = dict(ClustalParser(res["Align"].readlines()))
    # Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        new_alignment[int_keys[k]] = v
    # Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)
    # Clean up
    res.cleanUp()
    del (seq_collection, int_map, int_keys, app, res, alignment)

    return new_alignment

예제 #4

0

파일 보기

파일: fasttree.py 프로젝트: pycogent/pycogent

def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from alignment
    
    Will check MolType of aln object
    """
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params["-nt"] = True
    elif moltype == PROTEIN:
        params["-nt"] = False
    else:
        raise ValueError, "FastTree does not support moltype: %s" % moltype.label

    if best_tree:
        params["-slow"] = True

    # Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = aln.getIntMap()
    # Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map, MolType=moltype)

    app = FastTree(params=params)

    result = app(int_map.toFasta())
    tree = DndParser(result["Tree"].read(), constructor=PhyloNode)
    # remap tip names
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    return tree

예제 #5

0

파일 보기

파일: fasttree.py 프로젝트: mikerobeson/pycogent

def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from alignment
    
    Will check MolType of aln object
    """
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params['-nt'] = True
    elif moltype == PROTEIN:
        params['-nt'] = False
    else:
        raise ValueError, \
                "FastTree does not support moltype: %s" % moltype.label

    if best_tree:
        params['-slow'] = True

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = aln.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map, MolType=moltype)

    app = FastTree(params=params)

    result = app(int_map.toFasta())
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    #remap tip names
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    return tree

예제 #6

0

파일 보기

파일: clustalw.py 프로젝트: pombredanne/pycogent-1

def add_seqs_to_alignment(seqs, aln, moltype, params=None):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.alignment.SequenceCollection object, or data that can
    be used to build one.

    aln: a cogent.core.alignment.Alignment object, or data that can be used to
    build one

    params: dict of parameters to pass in to the Clustal app controller.
    """
    # create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs, MolType=moltype)
    # Create mapping between abbreviated IDs and full IDs
    seq_int_map, seq_int_keys = seq_collection.getIntMap()
    # Create SequenceCollection from int_map.
    seq_int_map = SequenceCollection(seq_int_map, MolType=moltype)

    # create Alignment object from aln
    aln = Alignment(aln, MolType=moltype)
    # Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln.getIntMap(prefix="seqn_")
    # Create SequenceCollection from int_map.
    aln_int_map = Alignment(aln_int_map, MolType=moltype)

    # Update seq_int_keys with aln_int_keys
    seq_int_keys.update(aln_int_keys)

    # Create Mafft app.
    app = Clustalw(InputHandler="_input_as_multiline_string", params=params, SuppressStderr=True)
    app.Parameters["-align"].off()
    app.Parameters["-infile"].off()
    app.Parameters["-sequences"].on()

    # Add aln_int_map as profile1
    app.Parameters["-profile1"].on(app._tempfile_as_multiline_string(aln_int_map.toFasta()))

    # Add seq_int_map as profile2
    app.Parameters["-profile2"].on(app._tempfile_as_multiline_string(seq_int_map.toFasta()))
    # Get results using int_map as input to app
    res = app()

    # Get alignment as dict out of results
    alignment = dict(ClustalParser(res["Align"].readlines()))

    # Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        new_alignment[seq_int_keys[k]] = v
    # Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)
    # Clean up
    res.cleanUp()
    remove(app.Parameters["-profile1"].Value)
    remove(app.Parameters["-profile2"].Value)
    del (seq_collection, seq_int_map, seq_int_keys, aln, aln_int_map, aln_int_keys, app, res, alignment)

    return new_alignment

예제 #7

0

파일 보기

파일: clustalw.py 프로젝트: jairideout/brokit

def bootstrap_tree_from_alignment(aln, seed=None, num_trees=None, params=None):
    """Returns a tree from Alignment object aln with bootstrap support values.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    seed: an interger, seed value to use

    num_trees: an integer, number of trees to bootstrap against

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.

    If seed is not specifed in params, a random integer between 0-1000 is used.
    """
    # Create instance of controllor, enable bootstrap, disable alignment,tree
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')
    app.Parameters['-align'].off()
    app.Parameters['-tree'].off()

    if app.Parameters['-bootstrap'].isOff():
        if num_trees is None:
            num_trees = 1000

        app.Parameters['-bootstrap'].on(num_trees)

    if app.Parameters['-seed'].isOff():
        if seed is None:
            seed = randint(0,1000)

        app.Parameters['-seed'].on(seed)

    if app.Parameters['-bootlabels'].isOff():
        app.Parameters['-bootlabels'].on("node")

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result, int_map, int_keys)

    return tree

예제 #8

0

파일 보기

def bootstrap_tree_from_alignment(aln, seed=None, num_trees=None, params=None):
    """Returns a tree from Alignment object aln with bootstrap support values.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    seed: an interger, seed value to use
    
    num_trees: an integer, number of trees to bootstrap against

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.

    If seed is not specifed in params, a random integer between 0-1000 is used.
    """
    # Create instance of controllor, enable bootstrap, disable alignment,tree
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')
    app.Parameters['-align'].off()
    app.Parameters['-tree'].off()

    if app.Parameters['-bootstrap'].isOff():
        if num_trees is None:
            num_trees = 1000

        app.Parameters['-bootstrap'].on(num_trees)

    if app.Parameters['-seed'].isOff():
        if seed is None:
            seed = randint(0, 1000)

        app.Parameters['-seed'].on(seed)

    if app.Parameters['-bootlabels'].isOff():
        app.Parameters['-bootlabels'].on("node")

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_collection, app, result, int_map, int_keys)

    return tree

예제 #9

0

파일 보기

파일: cd_hit.py 프로젝트: mikerobeson/pycogent

def cdhit_clusters_from_seqs(seqs, moltype, params=None):
    """Returns the CD-HIT clusters given seqs

    seqs        : dict like collection of sequences
    moltype     : cogent.core.moltype object
    params      : cd-hit parameters

    NOTE: This method will call CD_HIT if moltype is PROTIEN,
        CD_HIT_EST if moltype is RNA/DNA, and raise if any other
        moltype is passed.
    """
    # keys are not remapped. Tested against seq_ids of 100char length
    seqs = SequenceCollection(seqs, MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seqs.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)
    
    # setup params and make sure the output argument is set
    if params is None:
        params = {}
    if '-o' not in params:
        params['-o'] = get_tmp_filename()

    # call the correct version of cd-hit base on moltype
    working_dir = get_tmp_filename()
    if moltype is PROTEIN:
        app = CD_HIT(WorkingDir=working_dir, params=params)
    elif moltype is RNA:
        app = CD_HIT_EST(WorkingDir=working_dir, params=params)
    elif moltype is DNA:
        app = CD_HIT_EST(WorkingDir=working_dir, params=params)
    else:
        raise ValueError, "Moltype must be either PROTEIN, RNA, or DNA"

    # grab result
    res = app(int_map.toFasta())
    clusters = parse_cdhit_clstr_file(res['CLSTR'].readlines())

    remapped_clusters = []
    for c in clusters:
        curr = [int_keys[i] for i in c]
        remapped_clusters.append(curr)

    # perform cleanup
    res.cleanUp()
    shutil.rmtree(working_dir)
    remove(params['-o'] + '.bak.clstr')

    return remapped_clusters

예제 #10

0

파일 보기

파일: muscle.py 프로젝트: cxhernandez/pycogent

def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.
    
    aln: a cogent.core.alignment.Alignment object, or data that can be used 
    to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: unsupported
    
    params: dict of parameters to pass in to the Muscle app controller.
    
    The result will be an cogent.core.tree.PhyloNode object, or None if tree 
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Muscle(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')

    app.Parameters['-cluster'].on()
    app.Parameters['-tree1'].on(get_tmp_filename(app.WorkingDir))
    app.Parameters['-seqtype'].on(moltype.label)

    seq_collection = SequenceCollection(aln, MolType=moltype)

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)


    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree1Out'].read(), constructor=PhyloNode)
    
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result)

    return tree

예제 #11

0

파일 보기

파일: muscle_v38.py 프로젝트: miklou/pycogent

def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.
    
    aln: a cogent.core.alignment.Alignment object, or data that can be used 
    to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: unsupported
    
    params: dict of parameters to pass in to the Muscle app controller.
    
    The result will be an cogent.core.tree.PhyloNode object, or None if tree 
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Muscle(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')

    app.Parameters['-clusteronly'].on()
    app.Parameters['-tree1'].on(get_tmp_filename(app.WorkingDir))
    app.Parameters['-seqtype'].on(moltype.label)

    seq_collection = SequenceCollection(aln, MolType=moltype)

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)


    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree1Out'].read(), constructor=PhyloNode)
    
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result)

    return tree

예제 #12

0

파일 보기

def align_unaligned_seqs(seqs,moltype,params=None,accurate=False):
    """Aligns unaligned sequences

    seqs: either list of sequence objects or list of strings
    add_seq_names: boolean. if True, sequence names are inserted in the list
        of sequences. if False, it assumes seqs is a list of lines of some
        proper format that the program can handle
    """
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)
    #Create Mafft app.
    app = Mafft(InputHandler='_input_as_multiline_string',params=params)
    
    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()
    
    #Do not report progress
    app.Parameters['--quiet'].on()
    
    #More accurate alignment, sacrificing performance.
    if accurate:
        app.Parameters['--globalpair'].on()
        app.Parameters['--maxiterate'].Value=1000
    
    #Get results using int_map as input to app
    res = app(int_map.toFasta())
    #Get alignment as dict out of results
    alignment = dict(MinimalFastaParser(res['StdOut'].readlines()))
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in list(alignment.items()):
        new_alignment[int_keys[k]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    del(seq_collection,int_map,int_keys,app,res,alignment)

    return new_alignment

예제 #13

0

파일 보기

파일: mafft.py 프로젝트: ElDeveloper/brokit

def align_unaligned_seqs(seqs,moltype,params=None,accurate=False):
    """Aligns unaligned sequences

    seqs: either list of sequence objects or list of strings
    add_seq_names: boolean. if True, sequence names are inserted in the list
        of sequences. if False, it assumes seqs is a list of lines of some
        proper format that the program can handle
    """
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)
    #Create Mafft app.
    app = Mafft(InputHandler='_input_as_multiline_string',params=params)
    
    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()
    
    #Do not report progress
    app.Parameters['--quiet'].on()
    
    #More accurate alignment, sacrificing performance.
    if accurate:
        app.Parameters['--globalpair'].on()
        app.Parameters['--maxiterate'].Value=1000
    
    #Get results using int_map as input to app
    res = app(int_map.toFasta())
    #Get alignment as dict out of results
    alignment = dict(parse_fasta(res['StdOut']))
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        new_alignment[int_keys[k]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    del(seq_collection,int_map,int_keys,app,res,alignment)

    return new_alignment

예제 #14

0

파일 보기

파일: cd_hit.py 프로젝트: mikerobeson/pycogent

def cdhit_from_seqs(seqs, moltype, params=None):
    """Returns the CD-HIT results given seqs

    seqs    : dict like collection of sequences
    moltype : cogent.core.moltype object
    params  : cd-hit parameters

    NOTE: This method will call CD_HIT if moltype is PROTIEN,
        CD_HIT_EST if moltype is RNA/DNA, and raise if any other
        moltype is passed.
    """
    # keys are not remapped. Tested against seq_ids of 100char length
    seqs = SequenceCollection(seqs, MolType=moltype)

    # setup params and make sure the output argument is set
    if params is None:
        params = {}
    if '-o' not in params:
        params['-o'] = get_tmp_filename()

    # call the correct version of cd-hit base on moltype
    working_dir = get_tmp_filename()
    if moltype is PROTEIN:
        app = CD_HIT(WorkingDir=working_dir, params=params)
    elif moltype is RNA:
        app = CD_HIT_EST(WorkingDir=working_dir, params=params)
    elif moltype is DNA:
        app = CD_HIT_EST(WorkingDir=working_dir, params=params)
    else:
        raise ValueError, "Moltype must be either PROTEIN, RNA, or DNA"

    # grab result
    res = app(seqs.toFasta())
    new_seqs = dict(MinimalFastaParser(res['FASTA'].readlines()))

    # perform cleanup
    res.cleanUp()
    shutil.rmtree(working_dir)
    remove(params['-o'] + '.bak.clstr')

    return SequenceCollection(new_seqs, MolType=moltype)

예제 #15

0

파일 보기

def align_unaligned_seqs(seqs, moltype, params=None):
    """Returns an Alignment object from seqs.

    seqs: SequenceCollection object, or data that can be used to build one.
    
    moltype: a MolType object.  DNA, RNA, or PROTEIN.

    params: dict of parameters to pass in to the Muscle app controller.
    
    Result will be an Alignment object.
    """
    if not params:
        params = {}
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs, MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map, MolType=moltype)
    #get temporary filename
    params.update({'-out': get_tmp_filename()})
    #Create Muscle app.
    app = Muscle(InputHandler='_input_as_multiline_string',\
                 params=params)
    #Get results using int_map as input to app
    res = app(int_map.toFasta())
    #Get alignment as dict out of results
    alignment = dict(MinimalFastaParser(res['MuscleOut'].readlines()))
    #Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        new_alignment[int_keys[k]] = v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)
    #Clean up
    res.cleanUp()
    del (seq_collection, int_map, int_keys, app, res, alignment, params)

    return new_alignment

예제 #16

0

파일 보기

파일: muscle_v38.py 프로젝트: miklou/pycogent

def align_unaligned_seqs(seqs, moltype, params=None):
    """Returns an Alignment object from seqs.

    seqs: SequenceCollection object, or data that can be used to build one.
    
    moltype: a MolType object.  DNA, RNA, or PROTEIN.

    params: dict of parameters to pass in to the Muscle app controller.
    
    Result will be an Alignment object.
    """
    if not params:
        params = {}
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)
    #get temporary filename
    params.update({'-out':get_tmp_filename()})
    #Create Muscle app.
    app = Muscle(InputHandler='_input_as_multiline_string',\
                 params=params)
    #Get results using int_map as input to app
    res = app(int_map.toFasta())
    #Get alignment as dict out of results
    alignment = dict(MinimalFastaParser(res['MuscleOut'].readlines()))
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        new_alignment[int_keys[k]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    del(seq_collection,int_map,int_keys,app,res,alignment,params)

    return new_alignment

예제 #17

0

파일 보기

def create_locarnap_alignment(seqs, moltype, struct=False, params=None):
    """Returns mlocarna results given an unaligned SequenceCollection.
    
        - seqs: A SequenceCollection object or something that behaves like one.
        - moltype: cogent.core.moltype object.
        -struct: Boolean whether or not to also output vienna structure string
    """
    #Construct SequenceCollection object.
    seqs = SequenceCollection(seqs, MolType=moltype)

    #need to make int map.
    int_map, int_keys = seqs.getIntMap()
    #construct SequenceCollection object from int map to use functionality
    int_map = SequenceCollection(int_map, MolType=moltype)

    #Create application.
    app = MLocarna(InputHandler='_input_as_multiline_string', params=params)
    #Get temporary directory to write all mlocarna files.
    mlocarna_dir = get_tmp_filename(suffix='')
    app.Parameters['--tgtdir'].on(mlocarna_dir)

    #set parameters to run locarna-p
    app.Parameters['--write-structure'].on()
    app.Parameters['--probabilistic'].on()
    app.Parameters['--consistency-transformation'].on()
    res = app(int_map.toFasta())
    #get the structure from the results if necessary
    if struct:
        structfile = open(res['ProbabilisticAlignment'].name, 'U')
        structure = ""
        newstrline = True
        for line in structfile:
            line = line.strip()
            #read in structure lines of alignment (--write-structure)
            if len(line) > 0 and (line[0] == "." or line[0] == "("):
                #only append if new structure aspect, since struct is
                #written both above and below blocks in alignment
                if newstrline:
                    structure += line
                    newstrline = not newstrline
                else:
                    newstrline = not newstrline

    aligned = dict(ClustalParser(res['ClustalAlignment']))

    #Make new dict mapping original IDs
    new_alignment = {}
    for k, v in aligned.items():
        new_alignment[int_keys.get(k, k)] = v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)

    #Clean up after MlocARNA
    res.cleanUp()
    shutil.rmtree(mlocarna_dir)

    #output alignment and structure if asked for, else outout just alignment
    if struct:
        return new_alignment, structure
    else:
        return new_alignment

예제 #18

0

파일 보기

파일: muscle_v38.py 프로젝트: biocore/burrito-fillings

def add_seqs_to_alignment(seqs, aln, params=None):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.alignment.SequenceCollection object, or data that can
    be used to build one.

    aln: a cogent.core.alignment.Alignment object, or data that can be used
    to build one

    params: dict of parameters to pass in to the Muscle app controller.
    """
    if not params:
        params = {}

    #create SequenceCollection object from seqs
    seqs_collection = SequenceCollection(seqs)
    #Create mapping between abbreviated IDs and full IDs
    seqs_int_map, seqs_int_keys = seqs_collection.getIntMap(prefix='seq_')
    #Create SequenceCollection from int_map.
    seqs_int_map = SequenceCollection(seqs_int_map)

    #create SequenceCollection object from aln
    aln_collection = SequenceCollection(aln)
    #Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln_collection.getIntMap(prefix='aln_')
    #Create SequenceCollection from int_map.
    aln_int_map = SequenceCollection(aln_int_map)

    #set output and profile options
    params.update({'-out':get_tmp_filename(), '-profile':True})

    #save seqs to tmp file
    seqs_filename = get_tmp_filename()
    seqs_out = open(seqs_filename,'w')
    seqs_out.write(seqs_int_map.toFasta())
    seqs_out.close()

    #save aln to tmp file
    aln_filename = get_tmp_filename()
    aln_out = open(aln_filename, 'w')
    aln_out.write(aln_int_map.toFasta())
    aln_out.close()

    #Create Muscle app and get results
    app = Muscle(InputHandler='_input_as_multifile', params=params,
                 WorkingDir=tempfile.gettempdir())
    res = app((aln_filename, seqs_filename))

    #Get alignment as dict out of results
    alignment = dict(parse_fasta(res['MuscleOut']))
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        if k in seqs_int_keys:
            new_alignment[seqs_int_keys[k]] = v
        else:
            new_alignment[aln_int_keys[k]] = v

    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment)

    #Clean up
    res.cleanUp()
    del(seqs_collection, seqs_int_map, seqs_int_keys)
    del(aln_collection, aln_int_map, aln_int_keys)
    del(app, res, alignment, params)
    remove(seqs_filename)
    remove(aln_filename)

    return new_alignment

예제 #19

0

파일 보기

파일: muscle_v38.py 프로젝트: miklou/pycogent

def align_two_alignments(aln1, aln2, params=None):
    """Returns an Alignment object from two existing Alignments.
    
    aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be 
    used to build them.
    
    params: dict of parameters to pass in to the Muscle app controller.
    """
    if not params:
        params = {}

    #create SequenceCollection object from aln1
    aln1_collection = SequenceCollection(aln1)
    #Create mapping between abbreviated IDs and full IDs
    aln1_int_map, aln1_int_keys = aln1_collection.getIntMap(prefix='aln1_')
    #Create SequenceCollection from int_map.
    aln1_int_map = SequenceCollection(aln1_int_map)

    #create SequenceCollection object from aln2
    aln2_collection = SequenceCollection(aln2)
    #Create mapping between abbreviated IDs and full IDs
    aln2_int_map, aln2_int_keys = aln2_collection.getIntMap(prefix='aln2_')
    #Create SequenceCollection from int_map.
    aln2_int_map = SequenceCollection(aln2_int_map)

    #set output and profile options
    params.update({'-out':get_tmp_filename(), '-profile':True})

    #save aln1 to tmp file
    aln1_filename = get_tmp_filename()
    aln1_out = open(aln1_filename,'w')
    aln1_out.write(aln1_int_map.toFasta())
    aln1_out.close()

    #save aln2 to tmp file
    aln2_filename = get_tmp_filename()
    aln2_out = open(aln2_filename, 'w')
    aln2_out.write(aln2_int_map.toFasta())
    aln2_out.close()

    #Create Muscle app and get results
    app = Muscle(InputHandler='_input_as_multifile', params=params)
    res = app((aln1_filename, aln2_filename))

    #Get alignment as dict out of results
    alignment = dict(MinimalFastaParser(res['MuscleOut'].readlines()))

    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        if k in aln1_int_keys:
            new_alignment[aln1_int_keys[k]] = v
        else:
            new_alignment[aln2_int_keys[k]] = v

    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment)

    #Clean up
    res.cleanUp()
    del(aln1_collection, aln1_int_map, aln1_int_keys)
    del(aln2_collection, aln2_int_map, aln2_int_keys)
    del(app, res, alignment, params)
    remove(aln1_filename)
    remove(aln2_filename)

    return new_alignment

예제 #20

0

파일 보기

파일: mafft.py 프로젝트: ElDeveloper/brokit

def add_seqs_to_alignment(seqs, aln, moltype, params=None, accurate=False):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.sequence.Sequence object, or data that can be used
    to build one.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one

    params: dict of parameters to pass in to the Mafft app controller.
    """
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    seq_int_map, seq_int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    seq_int_map = SequenceCollection(seq_int_map,MolType=moltype)
    
    #create Alignment object from aln
    aln = Alignment(aln,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln_int_map = Alignment(aln_int_map,MolType=moltype)
    
    #Update seq_int_keys with aln_int_keys
    seq_int_keys.update(aln_int_keys)
    
    #Create Mafft app.
    app = Mafft(InputHandler='_input_as_multiline_string',\
        params=params,
        SuppressStderr=True)
    
    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()
    
    #Do not report progress
    app.Parameters['--quiet'].on()
    
    #Add aln_int_map as seed alignment
    app.Parameters['--seed'].on(\
        app._tempfile_as_multiline_string(aln_int_map.toFasta()))
        
    #More accurate alignment, sacrificing performance.
    if accurate:
        app.Parameters['--globalpair'].on()
        app.Parameters['--maxiterate'].Value=1000
    
    #Get results using int_map as input to app
    res = app(seq_int_map.toFasta())
    #Get alignment as dict out of results
    alignment = dict(parse_fasta(res['StdOut']))
    
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        key = k.replace('_seed_','')
        new_alignment[seq_int_keys[key]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(app.Parameters['--seed'].Value)
    del(seq_collection,seq_int_map,seq_int_keys,\
        aln,aln_int_map,aln_int_keys,app,res,alignment)

    return new_alignment

예제 #21

0

파일 보기

def add_seqs_to_alignment(seqs, aln, moltype, params=None, accurate=False):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.sequence.Sequence object, or data that can be used
    to build one.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one

    params: dict of parameters to pass in to the Mafft app controller.
    """
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    seq_int_map, seq_int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    seq_int_map = SequenceCollection(seq_int_map,MolType=moltype)
    
    #create Alignment object from aln
    aln = Alignment(aln,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln_int_map = Alignment(aln_int_map,MolType=moltype)
    
    #Update seq_int_keys with aln_int_keys
    seq_int_keys.update(aln_int_keys)
    
    #Create Mafft app.
    app = Mafft(InputHandler='_input_as_multiline_string',\
        params=params,
        SuppressStderr=True)
    
    #Turn on correct moltype
    moltype_string = moltype.label.upper()
    app.Parameters[MOLTYPE_MAP[moltype_string]].on()
    
    #Do not report progress
    app.Parameters['--quiet'].on()
    
    #Add aln_int_map as seed alignment
    app.Parameters['--seed'].on(\
        app._tempfile_as_multiline_string(aln_int_map.toFasta()))
        
    #More accurate alignment, sacrificing performance.
    if accurate:
        app.Parameters['--globalpair'].on()
        app.Parameters['--maxiterate'].Value=1000
    
    #Get results using int_map as input to app
    res = app(seq_int_map.toFasta())
    #Get alignment as dict out of results
    alignment = dict(MinimalFastaParser(res['StdOut'].readlines()))
    
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in list(alignment.items()):
        key = k.replace('_seed_','')
        new_alignment[seq_int_keys[key]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(app.Parameters['--seed'].Value)
    del(seq_collection,seq_int_map,seq_int_keys,\
        aln,aln_int_map,aln_int_keys,app,res,alignment)

    return new_alignment

예제 #22

0

파일 보기

def align_two_alignments(aln1, aln2, params=None):
    """Returns an Alignment object from two existing Alignments.
    
    aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be 
    used to build them.
    
    params: dict of parameters to pass in to the Muscle app controller.
    """
    if not params:
        params = {}

    #create SequenceCollection object from aln1
    aln1_collection = SequenceCollection(aln1)
    #Create mapping between abbreviated IDs and full IDs
    aln1_int_map, aln1_int_keys = aln1_collection.getIntMap(prefix='aln1_')
    #Create SequenceCollection from int_map.
    aln1_int_map = SequenceCollection(aln1_int_map)

    #create SequenceCollection object from aln2
    aln2_collection = SequenceCollection(aln2)
    #Create mapping between abbreviated IDs and full IDs
    aln2_int_map, aln2_int_keys = aln2_collection.getIntMap(prefix='aln2_')
    #Create SequenceCollection from int_map.
    aln2_int_map = SequenceCollection(aln2_int_map)

    #set output and profile options
    params.update({'-out': get_tmp_filename(), '-profile': True})

    #save aln1 to tmp file
    aln1_filename = get_tmp_filename()
    aln1_out = open(aln1_filename, 'w')
    aln1_out.write(aln1_int_map.toFasta())
    aln1_out.close()

    #save aln2 to tmp file
    aln2_filename = get_tmp_filename()
    aln2_out = open(aln2_filename, 'w')
    aln2_out.write(aln2_int_map.toFasta())
    aln2_out.close()

    #Create Muscle app and get results
    app = Muscle(InputHandler='_input_as_multifile', params=params)
    res = app((aln1_filename, aln2_filename))

    #Get alignment as dict out of results
    alignment = dict(MinimalFastaParser(res['MuscleOut'].readlines()))

    #Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        if k in aln1_int_keys:
            new_alignment[aln1_int_keys[k]] = v
        else:
            new_alignment[aln2_int_keys[k]] = v

    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment)

    #Clean up
    res.cleanUp()
    del (aln1_collection, aln1_int_map, aln1_int_keys)
    del (aln2_collection, aln2_int_map, aln2_int_keys)
    del (app, res, alignment, params)
    remove(aln1_filename)
    remove(aln2_filename)

    return new_alignment

예제 #23

0

파일 보기

def add_seqs_to_alignment(seqs, aln, moltype, params=None):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.alignment.SequenceCollection object, or data that can
    be used to build one.

    aln: a cogent.core.alignment.Alignment object, or data that can be used to
    build one

    params: dict of parameters to pass in to the Clustal app controller.
    """
    #create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs, MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    seq_int_map, seq_int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    seq_int_map = SequenceCollection(seq_int_map, MolType=moltype)

    #create Alignment object from aln
    aln = Alignment(aln, MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln_int_map = Alignment(aln_int_map, MolType=moltype)

    #Update seq_int_keys with aln_int_keys
    seq_int_keys.update(aln_int_keys)

    #Create Mafft app.
    app = Clustalw(InputHandler='_input_as_multiline_string',\
        params=params,
        SuppressStderr=True)
    app.Parameters['-align'].off()
    app.Parameters['-infile'].off()
    app.Parameters['-sequences'].on()

    #Add aln_int_map as profile1
    app.Parameters['-profile1'].on(\
        app._tempfile_as_multiline_string(aln_int_map.toFasta()))

    #Add seq_int_map as profile2
    app.Parameters['-profile2'].on(\
        app._tempfile_as_multiline_string(seq_int_map.toFasta()))
    #Get results using int_map as input to app
    res = app()

    #Get alignment as dict out of results
    alignment = dict(ClustalParser(res['Align'].readlines()))

    #Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        new_alignment[seq_int_keys[k]] = v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(app.Parameters['-profile1'].Value)
    remove(app.Parameters['-profile2'].Value)
    del(seq_collection,seq_int_map,seq_int_keys,\
        aln,aln_int_map,aln_int_keys,app,res,alignment)

    return new_alignment

예제 #24

0

파일 보기

파일: locarna.py 프로젝트: squirrelo/SelexTrace2

def create_locarnap_alignment(seqs,moltype,struct=False,params=None):
    """Returns mlocarna results given an unaligned SequenceCollection.
    
        - seqs: A SequenceCollection object or something that behaves like one.
        - moltype: cogent.core.moltype object.
        -struct: Boolean whether or not to also output vienna structure string
    """
    #Construct SequenceCollection object.
    seqs = SequenceCollection(seqs,MolType=moltype)

    #need to make int map.
    int_map, int_keys = seqs.getIntMap()
    #construct SequenceCollection object from int map to use functionality
    int_map = SequenceCollection(int_map, MolType=moltype)
    
    #Create application.
    app = MLocarna(InputHandler='_input_as_multiline_string',params=params)
    #Get temporary directory to write all mlocarna files.
    mlocarna_dir = get_tmp_filename(suffix='')
    app.Parameters['--tgtdir'].on(mlocarna_dir)

    #set parameters to run locarna-p
    app.Parameters['--write-structure'].on()
    app.Parameters['--probabilistic'].on()
    app.Parameters['--consistency-transformation'].on()
    res = app(int_map.toFasta())
    #get the structure from the results if necessary
    if struct:
        structfile = open(res['ProbabilisticAlignment'].name, 'U')
        structure = ""
        newstrline = True
        for line in structfile:
            line = line.strip()
            #read in structure lines of alignment (--write-structure)
            if len(line) > 0 and (line[0] == "." or line[0] == "("):
                #only append if new structure aspect, since struct is 
                #written both above and below blocks in alignment
                if newstrline:
                    structure += line
                    newstrline = not newstrline
                else:
                    newstrline = not newstrline
        


    aligned = dict(ClustalParser(res['ClustalAlignment']))
    
    #Make new dict mapping original IDs
    new_alignment={}
    for k,v in aligned.items():
        new_alignment[int_keys.get(k,k)]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)

    #Clean up after MlocARNA
    res.cleanUp()
    shutil.rmtree(mlocarna_dir)

    #output alignment and structure if asked for, else outout just alignment
    if struct:
        return new_alignment, structure
    else:
        return new_alignment

예제 #25

0

파일 보기

def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    moltype: cogent.core.moltype.MolType object

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')
    app.Parameters['-align'].off()

    #Set params to empty dict if None.
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params['-type'] = 'd'
    elif moltype == PROTEIN:
        params['-type'] = 'p'
    else:
        raise ValueError, "moltype must be DNA, RNA, or PROTEIN"

    # best_tree -> bootstrap
    if best_tree:
        if '-bootstrap' not in params:
            app.Parameters['-bootstrap'].on(1000)
        if '-seed' not in params:
            app.Parameters['-seed'].on(randint(0, 1000))
        if '-bootlabels' not in params:
            app.Parameters['-bootlabels'].on('nodes')
    else:
        app.Parameters['-tree'].on()

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_collection, app, result, int_map, int_keys)

    return tree