Example #1
0
def assign_amino_acid_muts_vcf(prots, path):
    tree_meta = read_tree_meta_data(path)
    seqNames = prots[prots.keys()[0]]['sequences'].keys()
    excluded = []

    #go through every gene in the prots nested dict
    for fname, prot in prots.iteritems():
        sequences = prot['sequences']
        ref = prot['reference']
        positions = prot['positions']

        pats = []
        i=0
        #for each position, get the mutation in the right format
        #[ancestral][position][mutation]
        while i < len(positions):
            pi = positions[i]
            pos = pi+1 #convert to standard numbering for output (# starts at 1)
            refb = ref[pi]

            pattern = [ refb+str(pos)+sequences[k][pi] if pi in sequences[k].keys()
                        else "" for k,v in sequences.iteritems() ]

            #if the exact same mutation in all sequences, don't include! (only mutant against ref..)
            if not (len(pattern)==len(sequences) and len(np.unique(pattern))==1):
                pats.append(pattern)
            i+=1

        #convert our list of lists to matrix
        patMat = np.matrix(pats)

        #don't include if all the mutations identical across sequences! (only mutant against ref..)
        if len(pats) != 0:
            #for every sequence, assign the mutations in tree_meta
            for i in xrange(len(seqNames)):
                node_name = seqNames[i]
                ary = np.array(patMat[:,i]).reshape(-1,)
                tree_meta[node_name][fname+'_mutations'] = ",".join(ary[ary != ''])
        else:
            excluded.append(fname)

    if len(excluded) != 0:
        print "{} genes do not differ across the tree. They will not be added to tree meta-data or shown in auspice".format(len(excluded))


    #write it out!
    write_tree_meta_data(path, tree_meta)
Example #2
0
def add_drm_tree_meta(path, seqDRM):
    tree_meta = read_tree_meta_data(path)

    #add drug resistance to tree_meta, & make list for colouring
    drugMuts = {}
    drugMuts["Drug_Resistance"] = ['0']
    for seq, v in seqDRM.iteritems():
        #in case re-running, don't add mutations to old ones!
        remove_old_DRM(tree_meta[seq])
        tempList = {}
        for mut, drug in v.iteritems():
            drugs = drug.split(';')
            for drug in drugs:
                trDrug = drugTranslate(drug)
                if trDrug in tree_meta[seq]:
                    tree_meta[seq][trDrug] = ",".join(
                        [tree_meta[seq][trDrug], mut])
                else:
                    tree_meta[seq][trDrug] = mut

                if trDrug in drugMuts:
                    if tree_meta[seq][trDrug] not in drugMuts[trDrug]:
                        drugMuts[trDrug].append(tree_meta[seq][trDrug])
                else:
                    drugMuts[trDrug] = [tree_meta[seq][trDrug]]

                tempList[trDrug] = ""

        numResist = str(len(tempList))
        tree_meta[seq]["Drug_Resistance"] = numResist
        if numResist not in drugMuts["Drug_Resistance"]:
            drugMuts["Drug_Resistance"].append(numResist)

    #for any with no resistance, add a 0 to tree_meta
    for seq, v in tree_meta.iteritems():
        if 'Drug_Resistance' not in tree_meta[seq]:
            tree_meta[seq]["Drug_Resistance"] = '0'

    write_tree_meta_data(path, tree_meta)

    return drugMuts
if __name__ == '__main__':
    parser = generic_argparse("Export precomputed data as auspice jsons")
    parser.add_argument(
        '--prefix',
        required=True,
        help=
        "prefix for json files that are passed on to auspice (e.g., zika.fasta)"
    )
    parser.add_argument(
        '--reference',
        required=True,
        help="reference sequence needed for entropy feature export")

    args = parser.parse_args()
    path = args.path

    T = Phylo.read(tree_newick(path), 'newick')
    seq_meta = read_sequence_meta_data(path)
    tree_meta = read_tree_meta_data(path)
    attach_tree_meta_data(T, tree_meta)
    tree_layout(T)
    fields_to_export = tree_meta.values()[0].keys() + [
        "tvalue", "yvalue", "xvalue", "attr", "muts", "aa_muts"
    ]
    tjson = tree_to_json(T.root, extra_attr=fields_to_export)
    write_json(tjson, tree_json(path, args.prefix))

    export_sequence_json(T, path, args.prefix)

    export_diversity(path, args.prefix, args.reference)