Ejemplo n.º 1
0
def assign_amino_acid_muts_vcf(prots, path):
    tree_meta = read_tree_meta_data(path)
    seqNames = prots[prots.keys()[0]]['sequences'].keys()
    excluded = []

    #go through every gene in the prots nested dict
    for fname, prot in prots.iteritems():
        sequences = prot['sequences']
        ref = prot['reference']
        positions = prot['positions']

        pats = []
        i=0
        #for each position, get the mutation in the right format
        #[ancestral][position][mutation]
        while i < len(positions):
            pi = positions[i]
            pos = pi+1 #convert to standard numbering for output (# starts at 1)
            refb = ref[pi]

            pattern = [ refb+str(pos)+sequences[k][pi] if pi in sequences[k].keys()
                        else "" for k,v in sequences.iteritems() ]

            #if the exact same mutation in all sequences, don't include! (only mutant against ref..)
            if not (len(pattern)==len(sequences) and len(np.unique(pattern))==1):
                pats.append(pattern)
            i+=1

        #convert our list of lists to matrix
        patMat = np.matrix(pats)

        #don't include if all the mutations identical across sequences! (only mutant against ref..)
        if len(pats) != 0:
            #for every sequence, assign the mutations in tree_meta
            for i in xrange(len(seqNames)):
                node_name = seqNames[i]
                ary = np.array(patMat[:,i]).reshape(-1,)
                tree_meta[node_name][fname+'_mutations'] = ",".join(ary[ary != ''])
        else:
            excluded.append(fname)

    if len(excluded) != 0:
        print "{} genes do not differ across the tree. They will not be added to tree meta-data or shown in auspice".format(len(excluded))


    #write it out!
    write_tree_meta_data(path, tree_meta)
Ejemplo n.º 2
0
def add_drm_tree_meta(path, seqDRM):
    tree_meta = read_tree_meta_data(path)

    #add drug resistance to tree_meta, & make list for colouring
    drugMuts = {}
    drugMuts["Drug_Resistance"] = ['0']
    for seq, v in seqDRM.iteritems():
        #in case re-running, don't add mutations to old ones!
        remove_old_DRM(tree_meta[seq])
        tempList = {}
        for mut, drug in v.iteritems():
            drugs = drug.split(';')
            for drug in drugs:
                trDrug = drugTranslate(drug)
                if trDrug in tree_meta[seq]:
                    tree_meta[seq][trDrug] = ",".join(
                        [tree_meta[seq][trDrug], mut])
                else:
                    tree_meta[seq][trDrug] = mut

                if trDrug in drugMuts:
                    if tree_meta[seq][trDrug] not in drugMuts[trDrug]:
                        drugMuts[trDrug].append(tree_meta[seq][trDrug])
                else:
                    drugMuts[trDrug] = [tree_meta[seq][trDrug]]

                tempList[trDrug] = ""

        numResist = str(len(tempList))
        tree_meta[seq]["Drug_Resistance"] = numResist
        if numResist not in drugMuts["Drug_Resistance"]:
            drugMuts["Drug_Resistance"].append(numResist)

    #for any with no resistance, add a 0 to tree_meta
    for seq, v in tree_meta.iteritems():
        if 'Drug_Resistance' not in tree_meta[seq]:
            tree_meta[seq]["Drug_Resistance"] = '0'

    write_tree_meta_data(path, tree_meta)

    return drugMuts
Ejemplo n.º 3
0
    #T = tree_newick(path)
    meta = read_sequence_meta_data(path)
    fields = ['branchlength', 'clade']

    if args.timetree:
        tt = timetree(tree=T, aln=ref_alignment(path), confidence=args.confidence,
                      seq_meta=meta, reroot=None if args.keeproot else 'best',Tc=args.Tc)
        T = tt.tree
        fields.extend(['mutations', 'mutation_length', 'num_date', 'clock_length'])
        if args.confidence:
            fields.append('num_date_confidence')
    elif args.ancestral:
        tt = ancestral_sequence_inference(tree=T, aln=ref_alignment(path))
        T = tt.tree
        fields.extend(['mutations', 'mutation_length'])

    clade_index = 0
    for n in T.find_clades(order='preorder'):
        n.clade = clade_index
        clade_index+=1

    Phylo.write(T, tree_newick(path), 'newick')
    meta_dic = collect_tree_meta_data(T, fields)
    write_tree_meta_data(path, meta_dic)

    with open(sequence_gtr_model(path),'w') as ofile:
        ofile.write(str(tt.gtr))

    if args.timetree or args.ancestral:
        export_sequence_fasta(T, path)
Ejemplo n.º 4
0
        seqs[seq.name] = seq

    muts = {}
    muts[T.root.name]=''
    for node in T.get_nonterminals():
        pseq = seqs[node.name]
        for c in node:
            cseq = seqs[c.name]
            muts[c.name]=','.join([anc+str(pos+1)+der
                        for pos, (anc, der) in enumerate(zip(pseq, cseq))
                        if anc!=der])

    return muts


if __name__ == '__main__':
    parser = generic_argparse("Assign amino acid mutations to the tree")
    args = parser.parse_args()
    path = args.path

    tree_meta = read_tree_meta_data(path)
    T = Phylo.read(tree_newick(path), 'newick')

    for gene, aln_fname in get_genes_and_alignments(path, tree=True):
        if gene!='nuc':
            muts = get_amino_acid_mutations(T, aln_fname)

        for node_name in tree_meta:
            tree_meta[node_name][gene+'_mutations'] = muts[node_name]
    write_tree_meta_data(path, tree_meta)