def main(params): import time from io_util import read_json from io_util import write_json from tree_util import json_to_dendropy, dendropy_to_json print "--- Start fitness model optimization at " + time.strftime("%H:%M:%S") + " ---" tree_fname = "data/tree_refine.json" tree = json_to_dendropy(read_json(tree_fname)) fm = fitness_model(tree, predictors=params["predictors"], verbose=1) fm.predict(niter=params["niter"]) out_fname = "data/tree_fitness.json" write_json(dendropy_to_json(tree.seed_node), out_fname) return out_fname
def main(in_fname='data/tree_refine.json', tree=True): print "--- Mutational tolerance at " + time.strftime("%H:%M:%S") + " ---" viruses = read_json(in_fname) if tree: viruses = json_to_dendropy(viruses) assign_fitness(viruses) if tree: out_fname = "data/tree_tolerance.json" write_json(dendropy_to_json(viruses.seed_node), out_fname) else: out_fname = "data/virus_tolerance.json" write_json(viruses, out_fname) return out_fname, viruses
def main(params): import time from io_util import read_json from io_util import write_json from tree_util import json_to_dendropy, dendropy_to_json print "--- Start fitness model optimization at " + time.strftime( "%H:%M:%S") + " ---" tree_fname = 'data/tree_refine.json' tree = json_to_dendropy(read_json(tree_fname)) fm = fitness_model(tree, predictors=params['predictors'], verbose=1) fm.predict(niter=params['niter']) out_fname = "data/tree_fitness.json" write_json(dendropy_to_json(tree.root), out_fname) return out_fname
def main(tree_fname = 'data/tree_refine.json'): print "--- Testing predictor evaluations ---" tree = json_to_dendropy(read_json(tree_fname)) print "Calculating epitope distances" calc_epitope_distance(tree) print "Calculating nonepitope distances" calc_nonepitope_distance(tree) print "Calculating LBI" # calc_LBI(tree) print "Writing decorated tree" out_fname = "data/tree_predictors.json" write_json(dendropy_to_json(tree.seed_node), out_fname) return out_fname
def export_to_auspice(self, tree_fields=[], tree_pop_list=[], annotations=[], seq='aa'): from tree_util import dendropy_to_json, all_descendants from io_util import write_json, read_json print "--- Streamline at " + time.strftime("%H:%M:%S") + " ---" # Move sequence data to separate file print "Writing sequences" elems = {} for node in self.tree: if hasattr(node, "clade") and hasattr(node, "seq"): elems[node.clade] = {} elems[node.clade]['nuc'] = { pos: state for pos, (state, ancstate) in enumerate( izip(node.seq, self.tree.seed_node.seq)) if state != ancstate } for anno, aa_seq in node.aa_seq.iteritems(): elems[node.clade][anno] = { pos: state for pos, (state, ancstate) in enumerate( izip(aa_seq, self.tree.seed_node.aa_seq[anno])) if state != ancstate } elems['root'] = {} elems['root']['nuc'] = self.tree.seed_node.seq for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems(): elems['root'][anno] = aa_seq write_json(elems, self.auspice_sequences_fname, indent=None) print "Writing tree" self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields) for node in all_descendants(self.tree_json): for attr in tree_pop_list: if attr in node: node.pop(attr, None) if "freq" in node: for reg in node["freq"]: try: node["freq"][reg] = [ round(x, 3) for x in node["freq"][reg] ] except: node["freq"][reg] = "undefined" if hasattr(self, "clade_designations"): # find basal node of clade and assign clade x and y values based on this basal node clade_present = {} clade_xval = {} clade_yval = {} if hasattr(self.tree.seed_node, "freq"): self.frequencies['clades'] = { reg: { "pivots": list(self.tree.seed_node.pivots) } for reg in self.tree.seed_node.freq } for clade, gt in self.clade_designations.iteritems(): if clade in annotations: print "Annotating clade", clade tmp_nodes = sorted( (node for node in self.tree.postorder_node_iter() if not node.is_leaf() and all([ node.aa_seq[gene][pos - 1] == aa for gene, pos, aa in gt ])), key=lambda node: node.xvalue) if len(tmp_nodes): clade_present[clade] = True base_node = tmp_nodes[0] clade_xval[clade] = base_node.xvalue clade_yval[clade] = base_node.yvalue if hasattr(base_node, 'freq'): for region in base_node.freq: try: self.frequencies["clades"][region][ clade.lower()] = [ round(x, 3) for x in base_node.freq[region] ] print "added frequencies", region, clade except: print base_node.freq[region] else: clade_present[clade] = False print "clade", clade, gt, "not in tree" # append clades, coordinates and genotype to meta self.tree_json["clade_annotations"] = [ (clade, clade_xval[clade], clade_yval[clade], "/".join([gene + ':' + str(pos) + aa for gene, pos, aa in gt])) for clade, gt in self.clade_designations.iteritems() if clade in annotations and clade_present[clade] == True ] write_json(self.tree_json, self.auspice_tree_fname, indent=None) try: read_json(self.auspice_tree_fname) except: print "Read failed, rewriting with indents" write_json(self.tree_json, self.auspice_tree_fname, indent=1) # Write out frequencies if hasattr(self, 'frequencies'): if not hasattr(self, 'aa_entropy') and not hasattr( self, 'nuc_entropy'): self.determine_variable_positions() if hasattr(self, 'aa_entropy'): self.frequencies["entropy"] = {} self.frequencies["location"] = {} for anno, alnS in self.aa_entropy.iteritems(): self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\ int(self.cds[anno].location.start)] self.frequencies["entropy"][anno] = [[ pos, S, muts ] for pos, S, muts in izip( xrange(alnS.shape[0]), alnS, self.variable_aa_identities[anno])] elif seq == 'nuc' and hasattr(self, 'nuc_entropy'): self.frequencies["entropy"] = [ [pos, S, muts] for pos, S, muts in izip( xrange(self.nuc_entropy.shape[0]), self.nuc_entropy, self.variable_nuc_identities) ] write_json(self.frequencies, self.auspice_frequency_fname) # Write out metadata print "Writing out metadata" meta = {} meta["updated"] = time.strftime("X%d %b %Y").replace('X0', 'X').replace( 'X', '') try: from pygit2 import Repository, discover_repository current_working_directory = os.getcwd() repository_path = discover_repository(current_working_directory) repo = Repository(repository_path) commit_id = repo[repo.head.target].id meta["commit"] = str(commit_id) except ImportError: meta["commit"] = "unknown" if hasattr(self, "date_region_count"): meta["regions"] = self.regions meta["virus_stats"] = [ [str(y) + '-' + str(m)] + [self.date_region_count[(y, m)][reg] for reg in self.regions] for y, m in sorted(self.date_region_count.keys()) ] write_json(meta, self.auspice_meta_fname, indent=None) self.export_accession_numbers()
def export_to_auspice(self, tree_fields = [], tree_pop_list = [], annotations = [], seq='aa'): from tree_util import dendropy_to_json, all_descendants from io_util import write_json, read_json print time.strftime("%H:%M:%S") + " ---" # Move sequence data to separate file print "Writing sequences" elems = {} for node in self.tree: if hasattr(node, "clade") and hasattr(node, "seq"): elems[node.clade] = {} elems[node.clade]['nuc'] = {pos:state for pos, (state, ancstate) in enumerate(izip(node.seq, self.tree.seed_node.seq)) if state!=ancstate} for anno, aa_seq in node.aa_seq.iteritems(): elems[node.clade][anno] = {pos:state for pos, (state, ancstate) in enumerate(izip(aa_seq, self.tree.seed_node.aa_seq[anno])) if state!=ancstate} elems['root'] = {} elems['root']['nuc'] = self.tree.seed_node.seq for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems(): elems['root'][anno] = aa_seq write_json(elems, self.auspice_sequences_fname, indent=None) print "Writing tree" self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields) for node in all_descendants(self.tree_json): for attr in tree_pop_list: if attr in node: node.pop(attr, None) if "freq" in node: for reg in node["freq"]: try: node["freq"][reg] = [round(x,3) for x in node["freq"][reg]] except: node["freq"][reg] = "undefined" if hasattr(self,"clade_designations"): # find basal node of clade and assign clade x and y values based on this basal node clade_present = {} clade_xval = {} clade_yval = {} self.frequencies['clades'] = {reg:{"pivots":list(self.tree.seed_node.pivots)} for reg in self.tree.seed_node.freq} for clade, gt in self.clade_designations.iteritems(): if clade in annotations: print "Annotating clade", clade tmp_nodes = sorted((node for node in self.tree.postorder_node_iter() if not node.is_leaf() and all([node.aa_seq[gene][pos-1]==aa for gene, pos, aa in gt])), key=lambda node: node.xvalue) if len(tmp_nodes): clade_present[clade] = True base_node = tmp_nodes[0] clade_xval[clade] = base_node.xvalue clade_yval[clade] = base_node.yvalue for region in base_node.freq: try: self.frequencies["clades"][region][clade.lower()] = [round(x,3) for x in base_node.freq[region]] print "added frequencies",region, clade except: print base_node.freq[region] else: clade_present[clade] = False print "clade",clade, gt, "not in tree" # append clades, coordinates and genotype to meta self.tree_json["clade_annotations"] = [(clade, clade_xval[clade],clade_yval[clade], "/".join([gene+':'+str(pos)+aa for gene, pos, aa in gt])) for clade, gt in self.clade_designations.iteritems() if clade in annotations and clade_present[clade] == True] write_json(self.tree_json, self.auspice_tree_fname, indent=None) try: read_json(self.auspice_tree_fname) except: print "Read failed, rewriting with indents" write_json(self.tree_json, self.auspice_tree_fname, indent=1) # Include genotype frequencies if hasattr(self, 'frequencies'): if not hasattr(self, 'aa_entropy') and not hasattr(self, 'nuc_entropy'): self.determine_variable_positions() if hasattr(self, 'aa_entropy'): self.frequencies["entropy"] = {} self.frequencies["location"] = {} for anno, alnS in self.aa_entropy.iteritems(): self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\ int(self.cds[anno].location.start)] self.frequencies["entropy"][anno] = [ [pos, S, muts] for pos,S,muts in izip(xrange(alnS.shape[0]), alnS,self.variable_aa_identities[anno]) ] elif seq=='nuc' and hasattr(self, 'nuc_entropy'): self.frequencies["entropy"] = [ [pos, S, muts] for pos,S,muts in izip(xrange(self.nuc_entropy.shape[0]), self.nuc_entropy,self.variable_nuc_identities) ] write_json(self.frequencies, self.auspice_frequency_fname) print("WRITEEN") # Write out metadata print "Writing out metadata" meta = {} meta["updated"] = time.strftime("X%d %b %Y").replace('X0','X').replace('X','') try: from pygit2 import Repository, discover_repository current_working_directory = os.getcwd() repository_path = discover_repository(current_working_directory) repo = Repository(repository_path) commit_id = repo[repo.head.target].id meta["commit"] = str(commit_id) except ImportError: meta["commit"] = "unknown" if hasattr(self,"date_region_count"): meta["regions"] = self.regions meta["virus_stats"] = [ [str(y)+'-'+str(m)] + [self.date_region_count[(y,m)][reg] for reg in self.regions] for y,m in sorted(self.date_region_count.keys()) ] write_json(meta, self.auspice_meta_fname, indent=0)
def refine_tree(self): import tree_refine tree_refine.main(self.tree, self.viruses, config['outgroup'], config['cds']) write_json(dendropy_to_json(self.tree.seed_node), self.intermediate_tree_fname)
def export_to_auspice(self): import streamline tree_json = dendropy_to_json(self.tree.seed_node) streamline.main(tree_json, self.frequencies)