def assign_amino_acid_muts_vcf(prots, path): tree_meta = read_tree_meta_data(path) seqNames = prots[prots.keys()[0]]['sequences'].keys() excluded = [] #go through every gene in the prots nested dict for fname, prot in prots.iteritems(): sequences = prot['sequences'] ref = prot['reference'] positions = prot['positions'] pats = [] i=0 #for each position, get the mutation in the right format #[ancestral][position][mutation] while i < len(positions): pi = positions[i] pos = pi+1 #convert to standard numbering for output (# starts at 1) refb = ref[pi] pattern = [ refb+str(pos)+sequences[k][pi] if pi in sequences[k].keys() else "" for k,v in sequences.iteritems() ] #if the exact same mutation in all sequences, don't include! (only mutant against ref..) if not (len(pattern)==len(sequences) and len(np.unique(pattern))==1): pats.append(pattern) i+=1 #convert our list of lists to matrix patMat = np.matrix(pats) #don't include if all the mutations identical across sequences! (only mutant against ref..) if len(pats) != 0: #for every sequence, assign the mutations in tree_meta for i in xrange(len(seqNames)): node_name = seqNames[i] ary = np.array(patMat[:,i]).reshape(-1,) tree_meta[node_name][fname+'_mutations'] = ",".join(ary[ary != '']) else: excluded.append(fname) if len(excluded) != 0: print "{} genes do not differ across the tree. They will not be added to tree meta-data or shown in auspice".format(len(excluded)) #write it out! write_tree_meta_data(path, tree_meta)
def add_drm_tree_meta(path, seqDRM): tree_meta = read_tree_meta_data(path) #add drug resistance to tree_meta, & make list for colouring drugMuts = {} drugMuts["Drug_Resistance"] = ['0'] for seq, v in seqDRM.iteritems(): #in case re-running, don't add mutations to old ones! remove_old_DRM(tree_meta[seq]) tempList = {} for mut, drug in v.iteritems(): drugs = drug.split(';') for drug in drugs: trDrug = drugTranslate(drug) if trDrug in tree_meta[seq]: tree_meta[seq][trDrug] = ",".join( [tree_meta[seq][trDrug], mut]) else: tree_meta[seq][trDrug] = mut if trDrug in drugMuts: if tree_meta[seq][trDrug] not in drugMuts[trDrug]: drugMuts[trDrug].append(tree_meta[seq][trDrug]) else: drugMuts[trDrug] = [tree_meta[seq][trDrug]] tempList[trDrug] = "" numResist = str(len(tempList)) tree_meta[seq]["Drug_Resistance"] = numResist if numResist not in drugMuts["Drug_Resistance"]: drugMuts["Drug_Resistance"].append(numResist) #for any with no resistance, add a 0 to tree_meta for seq, v in tree_meta.iteritems(): if 'Drug_Resistance' not in tree_meta[seq]: tree_meta[seq]["Drug_Resistance"] = '0' write_tree_meta_data(path, tree_meta) return drugMuts
if __name__ == '__main__': parser = generic_argparse("Export precomputed data as auspice jsons") parser.add_argument( '--prefix', required=True, help= "prefix for json files that are passed on to auspice (e.g., zika.fasta)" ) parser.add_argument( '--reference', required=True, help="reference sequence needed for entropy feature export") args = parser.parse_args() path = args.path T = Phylo.read(tree_newick(path), 'newick') seq_meta = read_sequence_meta_data(path) tree_meta = read_tree_meta_data(path) attach_tree_meta_data(T, tree_meta) tree_layout(T) fields_to_export = tree_meta.values()[0].keys() + [ "tvalue", "yvalue", "xvalue", "attr", "muts", "aa_muts" ] tjson = tree_to_json(T.root, extra_attr=fields_to_export) write_json(tjson, tree_json(path, args.prefix)) export_sequence_json(T, path, args.prefix) export_diversity(path, args.prefix, args.reference)