Esempio n. 1
0
def GroupFastaParser(data,
                     label_to_name,
                     group_key="Group",
                     aligned=False,
                     moltype=ASCII,
                     done_groups=None,
                     DEBUG=False):
    """yields related sequences as a separate seq collection
    
    Arguments:
        - data: line iterable data source
        - label_to_name: LabelParser callback
        - group_key: name of group key in RichLabel.Info object
        - aligned: whether sequences are to be considered aligned
        - moltype: default is ASCII
        - done_groups: series of group keys to be excluded
        """

    done_groups = [[], done_groups][done_groups is not None]
    parser = MinimalFastaParser(data,
                                label_to_name=label_to_name,
                                finder=XmfaFinder)
    group_ids = []
    current_collection = {}
    for label, seq in parser:
        seq = moltype.makeSequence(seq, Name=label, Info=label.Info)
        if DEBUG:
            print "str(label) ", str(label), "repr(label)", repr(label)
        if not group_ids or label.Info[group_key] in group_ids:
            current_collection[label] = seq
            if not group_ids:
                group_ids.append(label.Info[group_key])
        else:
            # we finish off check of current before creating a collection
            if group_ids[-1] not in done_groups:
                info = Info(Group=group_ids[-1])
                if DEBUG:
                    print "GroupParser collection keys", current_collection.keys(
                    )
                seqs = cogent.LoadSeqs(data=current_collection,
                                       moltype=moltype,
                                       aligned=aligned)
                seqs.Info = info
                yield seqs
            current_collection = {label: seq}
            group_ids.append(label.Info[group_key])
    info = Info(Group=group_ids[-1])
    seqs = cogent.LoadSeqs(data=current_collection,
                           moltype=moltype,
                           aligned=aligned)
    seqs.Info = info
    yield seqs
Esempio n. 2
0
def get_alignment_tree(fname):
    """Build a neighbour joining tree"""

    from cogent.phylo import distance, nj
    from cogent.evolve.models import HKY85, F81
    al = cogent.LoadSeqs(fname, format='fasta')
    d = distance.EstimateDistances(al, submodel=F81())
    d.run(show_progress=False)
    mytree = nj.nj(d.getPairwiseDistances())
    mytree = mytree.balanced()
    print(mytree.asciiArt())
    print
    '''from cogent.draw import dendrogram
    p = dendrogram.SquareDendrogram(mytree)
    p.drawToPDF('tree-scaled.pdf', 500, 400, stroke_width=2.0,
                shade_param = 'r', max_value = 1.0,)'''
    return
Esempio n. 3
0
def main():
    fdir = sys.argv[1]
    odir = sys.argv[2]
    mkdir_p(odir)

    for fname in glob.iglob("{0}/*.fasta".format(fdir)):

        groupName = fname.split(os.path.sep)[-1].rstrip(".fasta")
        print("group {0}".format(groupName))
        try:
            seqs = cogent.LoadSeqs(fname,
                                   moltype=cogent.PROTEIN,
                                   aligned=False)
        except Exception as e:
            print(e)
            exit(0)
        aln = align_unaligned_seqs(seqs, cogent.PROTEIN)
        t = build_tree_from_alignment(aln, cogent.PROTEIN)
        print("tree for group {0}".format(str(t)))
        with open(os.path.sep.join([odir, groupName + ".nwk"]), 'wb') as ofile:
            ofile.write(str(t).replace("'", ""))
Esempio n. 4
0
def reconstruct(lf, aln, tree, locus=None):
    if tree.isTip():
        y = tree.Name
        P = lf.getPsubForEdge(y, locus=locus)
        j = str(aln.NamedSeqs[y])
        if 'N' in j:
            js = lf.model.getAlphabet().resolveAmbiguity(j)
            L = lambda i: max(P[i, j] for j in js)

            def C(i):
                j = argmax([P[i, j] for j in js])
                return js[j]
        else:
            L = lambda i: P[i, j]
            C = lambda i: j
        tree.C = C
        return L

    Ls = [reconstruct(lf, aln, c, locus=locus) for c in tree.Children]
    alphabet = list(lf.model.getAlphabet())
    calcedLs = {j: prod([L(j) for L in Ls]) for j in alphabet}

    if tree.isRoot():
        pi = lf.getMotifProbs()
        j = argmax([pi[j] * calcedLs[j] for j in alphabet])
        tree.anc = alphabet[j]
        result = [(tree.Name, tree.anc)]
        for child in tree.Children:
            _get_anc(child, result)
        return cogent.LoadSeqs(data=result)

    P = lf.getPsubForEdge(tree.Name, locus=locus)
    L = lambda i: max(P[i, j] * calcedLs[j] for j in alphabet)

    def C(i):
        j = argmax([P[i, j] * calcedLs[j] for j in alphabet])
        return alphabet[j]

    tree.C = C
    return L
Esempio n. 5
0
def main():
    arguments = docopt(__doc__, version='BuildSingleTree v1.0')
    print(arguments)
    netname = arguments['<network>']
    seqname = arguments['<seqfile>']
    ofname = arguments['-o']

    print("creating tree for {0}".format(netname))
    print("using sequences from {0}".format(seqname))

    G = nx.read_adjlist(netname)
    try:
        seqs = cogent.LoadSeqs(seqname, moltype=cogent.PROTEIN, aligned=False)
    except Exception as e:
        print(e)
        sys.exit(0)

    aln = align_unaligned_seqs(seqs, cogent.PROTEIN)
    t = build_tree_From_alignment(aln, cogent.PROTEIN)
    print("tree  = {0}".format( str(t) ) )
    with open( os.path.sep.join( [ "all" ] ), 'wb') as ofile:
        ofile.write( str(t).replace("'","") )