def load_from_file(self, tree_fname=None, virus_fname = None): if tree_fname is None: tree_fname = self.intermediate_tree_fname if os.path.isfile(tree_fname): self.tree = json_to_dendropy(read_json(tree_fname)) if virus_fname is None: virus_fname = self.clean_virus_fname if os.path.isfile(virus_fname): self.viruses = read_json(virus_fname) if os.path.isfile(self.frequency_fname): self.frequencies = read_json(self.frequency_fname)
def export_HI_mutation_effects(self): from io_util import write_json, read_json # make a tab delimited file with the mutaton effects table_effects = [] HI_mutation_effects_fname = self.output_path + self.prefix + self.resolution_prefix + 'HI_mutation_effects.tsv' with open(HI_mutation_effects_fname, 'w') as ofile: for mut, val in self.mutation_effects.iteritems(): mut_str = '/'.join([x[1] for x in self.mutation_clusters[mut]]) ofile.write(mut_str + '\t' + str(np.round(val, 4)) + '\t' + str(self.mutation_counter[mut]) + '\n') if val > 0.001: table_effects.append((mut_str, round(val, 2))) # export mutation effects to JSON try: #they are added to a larger json fir different lineages and resultions display_effects = read_json(self.auspice_HI_display_mutations) except: # if file doesn't yet exist, create and empty dictionary display_effects = {} # effects for use in the js are indext by first mutation in cluster model_effects = { mut[0] + ':' + mut[1]: val for mut, val in self.mutation_effects.iteritems() if val > 0.01 } write_json(model_effects, self.auspice_HI_fname) if self.virus_type not in display_effects: display_effects[self.virus_type] = {} table_effects.sort(key=lambda x: x[1], reverse=True) display_effects[self.virus_type][self.resolution] = table_effects write_json(display_effects, self.auspice_HI_display_mutations)
def export_HI_mutation_effects(self): from io_util import write_json, read_json # make a tab delimited file with the mutaton effects table_effects = [] HI_mutation_effects_fname = self.output_path+self.prefix+self.resolution_prefix+'HI_mutation_effects.tsv' with open(HI_mutation_effects_fname, 'w') as ofile: for mut, val in self.mutation_effects.iteritems(): mut_str = '/'.join([x[1] for x in self.mutation_clusters[mut]]) ofile.write(mut_str+'\t'+str(np.round(val,4))+'\t'+str(self.mutation_counter[mut])+'\n') if val>0.001: table_effects.append((mut_str,round(val,2))) # export mutation effects to JSON try: #they are added to a larger json fir different lineages and resultions display_effects = read_json(self.auspice_HI_display_mutations) except: # if file doesn't yet exist, create and empty dictionary display_effects = {} # effects for use in the js are indext by first mutation in cluster model_effects = {mut[0]+':'+mut[1]:val for mut, val in self.mutation_effects.iteritems() if val>0.01} write_json(model_effects, self.auspice_HI_fname) if self.virus_type not in display_effects: display_effects[self.virus_type]={} table_effects.sort(key = lambda x:x[1], reverse=True) display_effects[self.virus_type][self.resolution] = table_effects write_json(display_effects, self.auspice_HI_display_mutations)
def test(params): from io_util import read_json from tree_util import json_to_dendropy, to_Biopython, color_BioTree_by_attribute from Bio import Phylo tree_fname = 'data/tree_refine_10y_50v.json' tree = json_to_dendropy(read_json(tree_fname)) fm = fitness_model(tree, predictors=params['predictors'], verbose=2) fm.predict(niter=params['niter']) #btree = to_Biopython(tree) #color_BioTree_by_attribute(btree, 'fitness') #Phylo.draw(btree, label_func=lambda x:'') return fm
def test(params): from io_util import read_json from tree_util import json_to_dendropy, to_Biopython, color_BioTree_by_attribute from Bio import Phylo tree_fname='data/tree_refine_10y_50v.json' tree = json_to_dendropy(read_json(tree_fname)) fm = fitness_model(tree, predictors = params['predictors'], verbose=2) fm.predict(niter = params['niter']) #btree = to_Biopython(tree) #color_BioTree_by_attribute(btree, 'fitness') #Phylo.draw(btree, label_func=lambda x:'') return fm
def test(): from Bio import Phylo tree = json_to_dendropy(read_json('auspice/tree.json')) print "calculate local branching index" T2 = get_average_T2(tree, 365) tau = T2 * 2**-4 print "avg pairwise distance:", T2 print "memory time scale:", tau calc_delta_LBI(tree, tau, datetime.datetime(2014, 1, 1)) bioTree = to_Biopython(tree) color_BioTree_by_attribute(bioTree, 'date') Phylo.draw(bioTree)
def main(): print "--- Tree LBI at " + time.strftime("%H:%M:%S") + " ---" tree = json_to_dendropy(read_json('data/tree_refine.json')) print "calculate local branching index" T2 = get_average_T2(tree, 365) tau = T2 * 2**-4 print "avg pairwise distance:", T2 print "memory time scale:", tau calc_LBI(tree, tau=tau) write_json(dendropy_to_json(tree.seed_node), "data/tree_LBI.json")
def main(params): import time from io_util import read_json from io_util import write_json from tree_util import json_to_dendropy, dendropy_to_json print "--- Start fitness model optimization at " + time.strftime("%H:%M:%S") + " ---" tree_fname = "data/tree_refine.json" tree = json_to_dendropy(read_json(tree_fname)) fm = fitness_model(tree, predictors=params["predictors"], verbose=1) fm.predict(niter=params["niter"]) out_fname = "data/tree_fitness.json" write_json(dendropy_to_json(tree.seed_node), out_fname) return out_fname
def main(params): import time from io_util import read_json from io_util import write_json from tree_util import json_to_dendropy, dendropy_to_json print "--- Start fitness model optimization at " + time.strftime("%H:%M:%S") + " ---" tree_fname='data/tree_refine.json' tree = json_to_dendropy(read_json(tree_fname)) fm = fitness_model(tree, predictors = params['predictors'], verbose=1) fm.predict(niter = params['niter']) out_fname = "tree_fitness.json" write_json(dendropy_to_json(tree.seed_node), out_fname) return out_fname
def main(in_fname='data/tree_refine.json', tree=True): print "--- Mutational tolerance at " + time.strftime("%H:%M:%S") + " ---" viruses = read_json(in_fname) if tree: viruses = json_to_dendropy(viruses) assign_fitness(viruses) if tree: out_fname = "data/tree_tolerance.json" write_json(dendropy_to_json(viruses.seed_node), out_fname) else: out_fname = "data/virus_tolerance.json" write_json(viruses, out_fname) return out_fname, viruses
def main(in_fname='tree_refine.json', tree=True): print "--- Mutational tolerance at " + time.strftime("%H:%M:%S") + " ---" viruses = read_json(in_fname) if tree: viruses = json_to_dendropy(viruses) assign_fitness(viruses) if tree: out_fname = "tree_tolerance.json" write_json(dendropy_to_json(viruses.seed_node), out_fname) else: out_fname = "virus_tolerance.json" write_json(viruses, out_fname) return out_fname, viruses
def main(tree_fname = 'data/tree_refine.json'): print "--- Testing predictor evaluations ---" tree = json_to_dendropy(read_json(tree_fname)) print "Calculating epitope distances" calc_epitope_distance(tree) print "Calculating nonepitope distances" calc_nonepitope_distance(tree) print "Calculating LBI" # calc_LBI(tree) print "Writing decorated tree" out_fname = "data/tree_predictors.json" write_json(dendropy_to_json(tree.seed_node), out_fname) return out_fname
def export_to_auspice(self, tree_fields=[], tree_pop_list=[], annotations=[], seq='aa'): from tree_util import dendropy_to_json, all_descendants from io_util import write_json, read_json print "--- Streamline at " + time.strftime("%H:%M:%S") + " ---" # Move sequence data to separate file print "Writing sequences" elems = {} for node in self.tree: if hasattr(node, "clade") and hasattr(node, "seq"): elems[node.clade] = {} elems[node.clade]['nuc'] = { pos: state for pos, (state, ancstate) in enumerate( izip(node.seq, self.tree.seed_node.seq)) if state != ancstate } for anno, aa_seq in node.aa_seq.iteritems(): elems[node.clade][anno] = { pos: state for pos, (state, ancstate) in enumerate( izip(aa_seq, self.tree.seed_node.aa_seq[anno])) if state != ancstate } elems['root'] = {} elems['root']['nuc'] = self.tree.seed_node.seq for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems(): elems['root'][anno] = aa_seq write_json(elems, self.auspice_sequences_fname, indent=None) print "Writing tree" self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields) for node in all_descendants(self.tree_json): for attr in tree_pop_list: if attr in node: node.pop(attr, None) if "freq" in node: for reg in node["freq"]: try: node["freq"][reg] = [ round(x, 3) for x in node["freq"][reg] ] except: node["freq"][reg] = "undefined" if hasattr(self, "clade_designations"): # find basal node of clade and assign clade x and y values based on this basal node clade_present = {} clade_xval = {} clade_yval = {} if hasattr(self.tree.seed_node, "freq"): self.frequencies['clades'] = { reg: { "pivots": list(self.tree.seed_node.pivots) } for reg in self.tree.seed_node.freq } for clade, gt in self.clade_designations.iteritems(): if clade in annotations: print "Annotating clade", clade tmp_nodes = sorted( (node for node in self.tree.postorder_node_iter() if not node.is_leaf() and all([ node.aa_seq[gene][pos - 1] == aa for gene, pos, aa in gt ])), key=lambda node: node.xvalue) if len(tmp_nodes): clade_present[clade] = True base_node = tmp_nodes[0] clade_xval[clade] = base_node.xvalue clade_yval[clade] = base_node.yvalue if hasattr(base_node, 'freq'): for region in base_node.freq: try: self.frequencies["clades"][region][ clade.lower()] = [ round(x, 3) for x in base_node.freq[region] ] print "added frequencies", region, clade except: print base_node.freq[region] else: clade_present[clade] = False print "clade", clade, gt, "not in tree" # append clades, coordinates and genotype to meta self.tree_json["clade_annotations"] = [ (clade, clade_xval[clade], clade_yval[clade], "/".join([gene + ':' + str(pos) + aa for gene, pos, aa in gt])) for clade, gt in self.clade_designations.iteritems() if clade in annotations and clade_present[clade] == True ] write_json(self.tree_json, self.auspice_tree_fname, indent=None) try: read_json(self.auspice_tree_fname) except: print "Read failed, rewriting with indents" write_json(self.tree_json, self.auspice_tree_fname, indent=1) # Write out frequencies if hasattr(self, 'frequencies'): if not hasattr(self, 'aa_entropy') and not hasattr( self, 'nuc_entropy'): self.determine_variable_positions() if hasattr(self, 'aa_entropy'): self.frequencies["entropy"] = {} self.frequencies["location"] = {} for anno, alnS in self.aa_entropy.iteritems(): self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\ int(self.cds[anno].location.start)] self.frequencies["entropy"][anno] = [[ pos, S, muts ] for pos, S, muts in izip( xrange(alnS.shape[0]), alnS, self.variable_aa_identities[anno])] elif seq == 'nuc' and hasattr(self, 'nuc_entropy'): self.frequencies["entropy"] = [ [pos, S, muts] for pos, S, muts in izip( xrange(self.nuc_entropy.shape[0]), self.nuc_entropy, self.variable_nuc_identities) ] write_json(self.frequencies, self.auspice_frequency_fname) # Write out metadata print "Writing out metadata" meta = {} meta["updated"] = time.strftime("X%d %b %Y").replace('X0', 'X').replace( 'X', '') try: from pygit2 import Repository, discover_repository current_working_directory = os.getcwd() repository_path = discover_repository(current_working_directory) repo = Repository(repository_path) commit_id = repo[repo.head.target].id meta["commit"] = str(commit_id) except ImportError: meta["commit"] = "unknown" if hasattr(self, "date_region_count"): meta["regions"] = self.regions meta["virus_stats"] = [ [str(y) + '-' + str(m)] + [self.date_region_count[(y, m)][reg] for reg in self.regions] for y, m in sorted(self.date_region_count.keys()) ] write_json(meta, self.auspice_meta_fname, indent=None) self.export_accession_numbers()
def export_to_auspice(self, tree_fields = [], tree_pop_list = [], annotations = [], seq='aa'): from tree_util import dendropy_to_json, all_descendants from io_util import write_json, read_json print time.strftime("%H:%M:%S") + " ---" # Move sequence data to separate file print "Writing sequences" elems = {} for node in self.tree: if hasattr(node, "clade") and hasattr(node, "seq"): elems[node.clade] = {} elems[node.clade]['nuc'] = {pos:state for pos, (state, ancstate) in enumerate(izip(node.seq, self.tree.seed_node.seq)) if state!=ancstate} for anno, aa_seq in node.aa_seq.iteritems(): elems[node.clade][anno] = {pos:state for pos, (state, ancstate) in enumerate(izip(aa_seq, self.tree.seed_node.aa_seq[anno])) if state!=ancstate} elems['root'] = {} elems['root']['nuc'] = self.tree.seed_node.seq for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems(): elems['root'][anno] = aa_seq write_json(elems, self.auspice_sequences_fname, indent=None) print "Writing tree" self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields) for node in all_descendants(self.tree_json): for attr in tree_pop_list: if attr in node: node.pop(attr, None) if "freq" in node: for reg in node["freq"]: try: node["freq"][reg] = [round(x,3) for x in node["freq"][reg]] except: node["freq"][reg] = "undefined" if hasattr(self,"clade_designations"): # find basal node of clade and assign clade x and y values based on this basal node clade_present = {} clade_xval = {} clade_yval = {} self.frequencies['clades'] = {reg:{"pivots":list(self.tree.seed_node.pivots)} for reg in self.tree.seed_node.freq} for clade, gt in self.clade_designations.iteritems(): if clade in annotations: print "Annotating clade", clade tmp_nodes = sorted((node for node in self.tree.postorder_node_iter() if not node.is_leaf() and all([node.aa_seq[gene][pos-1]==aa for gene, pos, aa in gt])), key=lambda node: node.xvalue) if len(tmp_nodes): clade_present[clade] = True base_node = tmp_nodes[0] clade_xval[clade] = base_node.xvalue clade_yval[clade] = base_node.yvalue for region in base_node.freq: try: self.frequencies["clades"][region][clade.lower()] = [round(x,3) for x in base_node.freq[region]] print "added frequencies",region, clade except: print base_node.freq[region] else: clade_present[clade] = False print "clade",clade, gt, "not in tree" # append clades, coordinates and genotype to meta self.tree_json["clade_annotations"] = [(clade, clade_xval[clade],clade_yval[clade], "/".join([gene+':'+str(pos)+aa for gene, pos, aa in gt])) for clade, gt in self.clade_designations.iteritems() if clade in annotations and clade_present[clade] == True] write_json(self.tree_json, self.auspice_tree_fname, indent=None) try: read_json(self.auspice_tree_fname) except: print "Read failed, rewriting with indents" write_json(self.tree_json, self.auspice_tree_fname, indent=1) # Include genotype frequencies if hasattr(self, 'frequencies'): if not hasattr(self, 'aa_entropy') and not hasattr(self, 'nuc_entropy'): self.determine_variable_positions() if hasattr(self, 'aa_entropy'): self.frequencies["entropy"] = {} self.frequencies["location"] = {} for anno, alnS in self.aa_entropy.iteritems(): self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\ int(self.cds[anno].location.start)] self.frequencies["entropy"][anno] = [ [pos, S, muts] for pos,S,muts in izip(xrange(alnS.shape[0]), alnS,self.variable_aa_identities[anno]) ] elif seq=='nuc' and hasattr(self, 'nuc_entropy'): self.frequencies["entropy"] = [ [pos, S, muts] for pos,S,muts in izip(xrange(self.nuc_entropy.shape[0]), self.nuc_entropy,self.variable_nuc_identities) ] write_json(self.frequencies, self.auspice_frequency_fname) print("WRITEEN") # Write out metadata print "Writing out metadata" meta = {} meta["updated"] = time.strftime("X%d %b %Y").replace('X0','X').replace('X','') try: from pygit2 import Repository, discover_repository current_working_directory = os.getcwd() repository_path = discover_repository(current_working_directory) repo = Repository(repository_path) commit_id = repo[repo.head.target].id meta["commit"] = str(commit_id) except ImportError: meta["commit"] = "unknown" if hasattr(self,"date_region_count"): meta["regions"] = self.regions meta["virus_stats"] = [ [str(y)+'-'+str(m)] + [self.date_region_count[(y,m)][reg] for reg in self.regions] for y,m in sorted(self.date_region_count.keys()) ] write_json(meta, self.auspice_meta_fname, indent=0)