def export_HI_mutation_effects(self): from io_util import write_json, read_json # make a tab delimited file with the mutaton effects table_effects = [] HI_mutation_effects_fname = self.output_path+self.prefix+self.resolution_prefix+'HI_mutation_effects.tsv' with open(HI_mutation_effects_fname, 'w') as ofile: for mut, val in self.mutation_effects.iteritems(): mut_str = '/'.join([x[1] for x in self.mutation_clusters[mut]]) ofile.write(mut_str+'\t'+str(np.round(val,4))+'\t'+str(self.mutation_counter[mut])+'\n') if val>0.001: table_effects.append((mut_str,round(val,2))) # export mutation effects to JSON try: #they are added to a larger json fir different lineages and resultions display_effects = read_json(self.auspice_HI_display_mutations) except: # if file doesn't yet exist, create and empty dictionary display_effects = {} # effects for use in the js are indext by first mutation in cluster model_effects = {mut[0]+':'+mut[1]:val for mut, val in self.mutation_effects.iteritems() if val>0.01} write_json(model_effects, self.auspice_HI_fname) if self.virus_type not in display_effects: display_effects[self.virus_type]={} table_effects.sort(key = lambda x:x[1], reverse=True) display_effects[self.virus_type][self.resolution] = table_effects write_json(display_effects, self.auspice_HI_display_mutations)
def export(self, path = '', extra_attr = ['aa_muts']): from Bio import Seq from itertools import izip timetree_fname = path+'tree.json' sequence_fname = path+'sequences.json' tree_json = tree_to_json(self.tree.root, extra_attr=extra_attr) write_json(tree_json, timetree_fname, indent=None) elems = {} elems['root'] = {} elems['root']['nuc'] = "".join(self.tree.root.sequence) for prot in self.proteins: tmp = str(self.proteins[prot].extract(Seq.Seq(elems['root']['nuc']))) #elems['root'][prot] = str(Seq.translate(tmp.replace('---', 'NNN'))).replace('X','-') elems['root'][prot] = str(Seq.translate(tmp.replace('-', 'N'))).replace('X','-') for node in self.tree.find_clades(): if hasattr(node, "clade") and hasattr(node, "sequence"): elems[node.clade] = {} elems[node.clade]['nuc'] = {pos:state for pos, (state, ancstate) in enumerate(izip(node.sequence, self.tree.root.sequence)) if state!=ancstate} for node in self.tree.find_clades(): if hasattr(node, "clade") and hasattr(node, "translations"): for prot in self.proteins: elems[node.clade][prot] = {pos:state for pos, (state, ancstate) in enumerate(izip(node.translations[prot], elems['root'][prot])) if state!=ancstate} write_json(elems, sequence_fname, indent=None)
def load_viruses(self, aln_fname=None, years_back=3, viruses_per_month=50): if config['virus']: from H3N2_filter import H3N2_filter as virus_filter fasta_fields = config['fasta_fields'] if 'force_include' in config and os.path.isfile( config['force_include']): with open(config['force_include']) as force_include_file: force_include_strains = [ line.strip() for line in force_include_file ] print "found ", len( force_include_strains), "strains to include" else: force_include_strains = [] else: from virus_filter import virus_filter as virus_filter fasta_fields = {0: 'strain'} if aln_fname is None: aln_fname = config['alignment_file'] my_filter = virus_filter(aln_fname, fasta_fields) my_filter.filter() my_filter.subsample(years_back, viruses_per_month, prioritize=force_include_strains, all_priority=True, region_specific=config['max_global']) self.viruses = my_filter.virus_subsample write_json(self.viruses, self.initial_virus_fname)
def estimate_frequencies(self, tasks = ['mutations','genotypes', 'clades', 'tree']): import bernoulli_frequency as freq_est plot=False freq_est.flu_stiffness = config['frequency_stiffness'] freq_est.time_interval = config['time_interval'] freq_est.pivots_per_year = config['pivots_per_year'] freq_est.relevant_pos_cutoff = 0.1 if 'mutations' in tasks or 'genotypes' in tasks: self.frequencies['mutations'], relevant_pos = freq_est.all_mutations(self.tree, config['aggregate_regions'], threshold = config['min_mutation_count'], plot=plot) if 'genotypes' in tasks: self.frequencies['genotypes'] = freq_est.all_genotypes(self.tree, config['aggregate_regions'], relevant_pos) #if 'specieshost' in tasks: #self.frequencies['specieshost'] = freq_est.all_genotypes(self.tree, config['aggregate_hosts'], relevant_pos) if 'clades' in tasks: self.frequencies['clades'] = freq_est.all_clades(self.tree, config['clade_designations'], config['aggregate_regions'], plot) if any(x in tasks for x in ['mutations','clades', 'genotypes']): write_json(self.frequencies, self.frequency_fname) if 'tree' in tasks: for region_label, regions in config['aggregate_regions']: print "--- "+"adding frequencies to tree "+region_label+ " " + time.strftime("%H:%M:%S") + " ---" freq_est.estimate_tree_frequencies(self.tree, threshold = 10, regions=regions, region_name=region_label)
def export_HI_mutation_effects(self): from io_util import write_json, read_json # make a tab delimited file with the mutaton effects table_effects = [] HI_mutation_effects_fname = self.output_path + self.prefix + self.resolution_prefix + 'HI_mutation_effects.tsv' with open(HI_mutation_effects_fname, 'w') as ofile: for mut, val in self.mutation_effects.iteritems(): mut_str = '/'.join([x[1] for x in self.mutation_clusters[mut]]) ofile.write(mut_str + '\t' + str(np.round(val, 4)) + '\t' + str(self.mutation_counter[mut]) + '\n') if val > 0.001: table_effects.append((mut_str, round(val, 2))) # export mutation effects to JSON try: #they are added to a larger json fir different lineages and resultions display_effects = read_json(self.auspice_HI_display_mutations) except: # if file doesn't yet exist, create and empty dictionary display_effects = {} # effects for use in the js are indext by first mutation in cluster model_effects = { mut[0] + ':' + mut[1]: val for mut, val in self.mutation_effects.iteritems() if val > 0.01 } write_json(model_effects, self.auspice_HI_fname) if self.virus_type not in display_effects: display_effects[self.virus_type] = {} table_effects.sort(key=lambda x: x[1], reverse=True) display_effects[self.virus_type][self.resolution] = table_effects write_json(display_effects, self.auspice_HI_display_mutations)
def export_diversity(self, fname='entropy.json', indent=None): ''' write the alignment entropy of each alignment (nucleotide and translations) to file ''' if not hasattr(self, "entropy"): self.diversity_statistics() entropy_json = {} for feat in self.entropy: S = [max(0, round(x, 4)) for x in self.entropy[feat]] n = len(S) if feat == 'nuc': entropy_json[feat] = { 'pos': range(0, n), 'codon': [x // 3 for x in range(0, n)], 'val': S } else: entropy_json[feat] = { 'pos': [x for x in self.proteins[feat]][::3], 'codon': [(x - self.proteins[feat].start) // 3 for x in self.proteins[feat]][::3], 'val': S } write_json(entropy_json, fname, indent=indent)
def export(self, path='', extra_attr=['aa_muts', 'clade'], plain_export=10, indent=None, write_seqs_json=True): ''' export the tree data structure along with the sequence information as json files for display in web browsers. parameters: path -- path (incl prefix) to which the output files are written. filenames themselves are standardized to *tree.json and *sequences.json extra_attr -- attributes of tree nodes that are exported to json plain_export -- store sequences are plain strings instead of differences to root if number of differences exceeds len(seq)/plain_export ''' from Bio import Seq from itertools import izip timetree_fname = path + '_tree.json' sequence_fname = path + '_sequences.json' tree_json = tree_to_json(self.tree.root, extra_attr=extra_attr) write_json(tree_json, timetree_fname, indent=indent) # prepare a json with sequence information to export. # first step: add the sequence & translations of the root as string elems = {} elems['root'] = {} elems['root']['nuc'] = "".join(self.tree.root.sequence) for prot, seq in self.tree.root.translations.iteritems(): elems['root'][prot] = seq # add sequence for every node in tree. code as difference to root # or as full strings. for node in self.tree.find_clades(): if hasattr(node, "clade"): elems[node.clade] = {} # loop over proteins and nucleotide sequences for prot, seq in [('nuc', "".join(node.sequence)) ] + node.translations.items(): differences = { pos: state for pos, (state, ancstate) in enumerate( izip(seq, elems['root'][prot])) if state != ancstate } if plain_export * len(differences) <= len(seq): elems[node.clade][prot] = differences else: elems[node.clade][prot] = seq if write_seqs_json: write_json(elems, sequence_fname, indent=indent)
def export_diversity(self, fname = 'entropy.json'): if not hasattr(self, "entropy"): self.diversity_statistics() entropy_json = {} for feat in self.entropy: S = [max(0,round(x,4)) for x in self.entropy[feat]] n = len(S) if feat=='nuc': entropy_json[feat] = {'pos':range(0,n), 'codon':[x//3 for x in range(0,n)], 'val':S} else: entropy_json[feat] = {'pos':[x for x in self.proteins[feat]][::3], 'codon':[(x-self.proteins[feat].start)//3 for x in self.proteins[feat]][::3], 'val':S} write_json(entropy_json, fname, indent=None)
def to_json(self, filename): """Export fitness model parameters, data, and accuracy statistics to JSON. """ # Convert predictor parameters to a data frame to easily export as # records. params_df = pd.DataFrame({ "predictor": self.predictors, "param": self.model_params.tolist(), "global_sd": self.global_sds.tolist() }) correlation_null, correlation_raw, correlation_rel = self.get_correlation( ) mcc = get_matthews_correlation_coefficient_for_data_frame( self.pred_vs_true_df) # Do not try to export titer data if it was provided to the model. predictor_kwargs = self.predictor_kwargs.copy() if "transform" in predictor_kwargs: predictor_kwargs["transform"] = str(predictor_kwargs["transform"]) if "titers" in predictor_kwargs: del predictor_kwargs["titers"] data = { "params": params_df.to_dict(orient="records"), "predictor_kwargs": predictor_kwargs, "data": self.pred_vs_true_df.to_dict(orient="records"), "accuracy": { "clade_error": self.clade_fit(self.model_params), "correlation_rel": correlation_rel[0], "mcc": mcc }, "delta_time": self.delta_time, "step_size": self.timepoint_step_size, "end_date": self.end_date } predictor_arrays = {} for key in self.predictor_arrays: predictor_arrays[key] = self.predictor_arrays[key].tolist() data["predictor_arrays"] = predictor_arrays freq_arrays = {} for key in self.freq_arrays: freq_arrays[key] = self.freq_arrays[key].tolist() data["freq_arrays"] = freq_arrays write_json(data, filename)
def main(): print "--- Tree LBI at " + time.strftime("%H:%M:%S") + " ---" tree = json_to_dendropy(read_json('data/tree_refine.json')) print "calculate local branching index" T2 = get_average_T2(tree, 365) tau = T2 * 2**-4 print "avg pairwise distance:", T2 print "memory time scale:", tau calc_LBI(tree, tau=tau) write_json(dendropy_to_json(tree.seed_node), "data/tree_LBI.json")
def to_json(self, filename): """Export fitness model parameters, data, and accuracy statistics to JSON. """ # Convert predictor parameters to a data frame to easily export as # records. params_df = pd.DataFrame({ "predictor": self.predictors, "param": self.model_params.tolist(), "global_sd": self.global_sds.tolist() }) correlation_null, correlation_raw, correlation_rel = self.get_correlation() mcc = get_matthews_correlation_coefficient_for_data_frame(self.pred_vs_true_df) # Do not try to export titer data if it was provided to the model. predictor_kwargs = self.predictor_kwargs.copy() if "transform" in predictor_kwargs: predictor_kwargs["transform"] = str(predictor_kwargs["transform"]) if "titers" in predictor_kwargs: del predictor_kwargs["titers"] data = { "params": params_df.to_dict(orient="records"), "predictor_kwargs": predictor_kwargs, "data": self.pred_vs_true_df.to_dict(orient="records"), "accuracy": { "clade_error": self.clade_fit(self.model_params), "correlation_rel": correlation_rel[0], "mcc": mcc }, "delta_time": self.delta_time, "step_size": self.timepoint_step_size, "end_date": self.end_date } predictor_arrays = {} for key in self.predictor_arrays: predictor_arrays[key] = self.predictor_arrays[key].tolist() data["predictor_arrays"] = predictor_arrays freq_arrays = {} for key in self.freq_arrays: freq_arrays[key] = self.freq_arrays[key].tolist() data["freq_arrays"] = freq_arrays write_json(data, filename)
def main(params): import time from io_util import read_json from io_util import write_json from tree_util import json_to_dendropy, dendropy_to_json print "--- Start fitness model optimization at " + time.strftime("%H:%M:%S") + " ---" tree_fname = "data/tree_refine.json" tree = json_to_dendropy(read_json(tree_fname)) fm = fitness_model(tree, predictors=params["predictors"], verbose=1) fm.predict(niter=params["niter"]) out_fname = "data/tree_fitness.json" write_json(dendropy_to_json(tree.seed_node), out_fname) return out_fname
def main(params): import time from io_util import read_json from io_util import write_json from tree_util import json_to_dendropy, dendropy_to_json print "--- Start fitness model optimization at " + time.strftime("%H:%M:%S") + " ---" tree_fname='data/tree_refine.json' tree = json_to_dendropy(read_json(tree_fname)) fm = fitness_model(tree, predictors = params['predictors'], verbose=1) fm.predict(niter = params['niter']) out_fname = "tree_fitness.json" write_json(dendropy_to_json(tree.seed_node), out_fname) return out_fname
def main(in_fname='tree_refine.json', tree=True): print "--- Mutational tolerance at " + time.strftime("%H:%M:%S") + " ---" viruses = read_json(in_fname) if tree: viruses = json_to_dendropy(viruses) assign_fitness(viruses) if tree: out_fname = "tree_tolerance.json" write_json(dendropy_to_json(viruses.seed_node), out_fname) else: out_fname = "virus_tolerance.json" write_json(viruses, out_fname) return out_fname, viruses
def main(in_fname='data/tree_refine.json', tree=True): print "--- Mutational tolerance at " + time.strftime("%H:%M:%S") + " ---" viruses = read_json(in_fname) if tree: viruses = json_to_dendropy(viruses) assign_fitness(viruses) if tree: out_fname = "data/tree_tolerance.json" write_json(dendropy_to_json(viruses.seed_node), out_fname) else: out_fname = "data/virus_tolerance.json" write_json(viruses, out_fname) return out_fname, viruses
def main(tree_fname = 'data/tree_refine.json'): print "--- Testing predictor evaluations ---" tree = json_to_dendropy(read_json(tree_fname)) print "Calculating epitope distances" calc_epitope_distance(tree) print "Calculating nonepitope distances" calc_nonepitope_distance(tree) print "Calculating LBI" # calc_LBI(tree) print "Writing decorated tree" out_fname = "data/tree_predictors.json" write_json(dendropy_to_json(tree.seed_node), out_fname) return out_fname
def export_diversity(self, fname='entropy.json'): if not hasattr(self, "entropy"): self.diversity_statistics() entropy_json = {} for feat in self.entropy: S = [max(0, round(x, 4)) for x in self.entropy[feat]] n = len(S) if feat == 'nuc': entropy_json[feat] = { 'pos': range(0, n), 'codon': [x // 3 for x in range(0, n)], 'val': S } else: entropy_json[feat] = { 'pos': [x for x in self.proteins[feat]][::3], 'codon': [(x - self.proteins[feat].start) // 3 for x in self.proteins[feat]][::3], 'val': S } write_json(entropy_json, fname, indent=None)
def load_viruses(self, aln_fname = None, years_back=3, viruses_per_month=50): if config['virus']: from H9_filter import H9_filter as virus_filter fasta_fields = config['fasta_fields'] if 'force_include' in config and os.path.isfile(config['force_include']): with open(config['force_include']) as force_include_file: force_include_strains = [line.strip() for line in force_include_file] print "found ",len(force_include_strains),"strains to include" else: force_include_strains = [] else: from virus_filter import virus_filter as virus_filter fasta_fields = {0:'strain'} if aln_fname is None: aln_fname = config['alignment_file'] my_filter = virus_filter(aln_fname, fasta_fields) my_filter.filter() my_filter.subsample(years_back, viruses_per_month, prioritize = force_include_strains, all_priority = True, region_specific=config['max_global']) self.viruses = my_filter.virus_subsample write_json(self.viruses, self.initial_virus_fname)
def export_to_auspice(self, tree_fields=[], tree_pop_list=[], annotations=[], seq='aa'): from tree_util import dendropy_to_json, all_descendants from io_util import write_json, read_json print "--- Streamline at " + time.strftime("%H:%M:%S") + " ---" # Move sequence data to separate file print "Writing sequences" elems = {} for node in self.tree: if hasattr(node, "clade") and hasattr(node, "seq"): elems[node.clade] = {} elems[node.clade]['nuc'] = { pos: state for pos, (state, ancstate) in enumerate( izip(node.seq, self.tree.seed_node.seq)) if state != ancstate } for anno, aa_seq in node.aa_seq.iteritems(): elems[node.clade][anno] = { pos: state for pos, (state, ancstate) in enumerate( izip(aa_seq, self.tree.seed_node.aa_seq[anno])) if state != ancstate } elems['root'] = {} elems['root']['nuc'] = self.tree.seed_node.seq for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems(): elems['root'][anno] = aa_seq write_json(elems, self.auspice_sequences_fname, indent=None) print "Writing tree" self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields) for node in all_descendants(self.tree_json): for attr in tree_pop_list: if attr in node: node.pop(attr, None) if "freq" in node: for reg in node["freq"]: try: node["freq"][reg] = [ round(x, 3) for x in node["freq"][reg] ] except: node["freq"][reg] = "undefined" if hasattr(self, "clade_designations"): # find basal node of clade and assign clade x and y values based on this basal node clade_present = {} clade_xval = {} clade_yval = {} if hasattr(self.tree.seed_node, "freq"): self.frequencies['clades'] = { reg: { "pivots": list(self.tree.seed_node.pivots) } for reg in self.tree.seed_node.freq } for clade, gt in self.clade_designations.iteritems(): if clade in annotations: print "Annotating clade", clade tmp_nodes = sorted( (node for node in self.tree.postorder_node_iter() if not node.is_leaf() and all([ node.aa_seq[gene][pos - 1] == aa for gene, pos, aa in gt ])), key=lambda node: node.xvalue) if len(tmp_nodes): clade_present[clade] = True base_node = tmp_nodes[0] clade_xval[clade] = base_node.xvalue clade_yval[clade] = base_node.yvalue if hasattr(base_node, 'freq'): for region in base_node.freq: try: self.frequencies["clades"][region][ clade.lower()] = [ round(x, 3) for x in base_node.freq[region] ] print "added frequencies", region, clade except: print base_node.freq[region] else: clade_present[clade] = False print "clade", clade, gt, "not in tree" # append clades, coordinates and genotype to meta self.tree_json["clade_annotations"] = [ (clade, clade_xval[clade], clade_yval[clade], "/".join([gene + ':' + str(pos) + aa for gene, pos, aa in gt])) for clade, gt in self.clade_designations.iteritems() if clade in annotations and clade_present[clade] == True ] write_json(self.tree_json, self.auspice_tree_fname, indent=None) try: read_json(self.auspice_tree_fname) except: print "Read failed, rewriting with indents" write_json(self.tree_json, self.auspice_tree_fname, indent=1) # Write out frequencies if hasattr(self, 'frequencies'): if not hasattr(self, 'aa_entropy') and not hasattr( self, 'nuc_entropy'): self.determine_variable_positions() if hasattr(self, 'aa_entropy'): self.frequencies["entropy"] = {} self.frequencies["location"] = {} for anno, alnS in self.aa_entropy.iteritems(): self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\ int(self.cds[anno].location.start)] self.frequencies["entropy"][anno] = [[ pos, S, muts ] for pos, S, muts in izip( xrange(alnS.shape[0]), alnS, self.variable_aa_identities[anno])] elif seq == 'nuc' and hasattr(self, 'nuc_entropy'): self.frequencies["entropy"] = [ [pos, S, muts] for pos, S, muts in izip( xrange(self.nuc_entropy.shape[0]), self.nuc_entropy, self.variable_nuc_identities) ] write_json(self.frequencies, self.auspice_frequency_fname) # Write out metadata print "Writing out metadata" meta = {} meta["updated"] = time.strftime("X%d %b %Y").replace('X0', 'X').replace( 'X', '') try: from pygit2 import Repository, discover_repository current_working_directory = os.getcwd() repository_path = discover_repository(current_working_directory) repo = Repository(repository_path) commit_id = repo[repo.head.target].id meta["commit"] = str(commit_id) except ImportError: meta["commit"] = "unknown" if hasattr(self, "date_region_count"): meta["regions"] = self.regions meta["virus_stats"] = [ [str(y) + '-' + str(m)] + [self.date_region_count[(y, m)][reg] for reg in self.regions] for y, m in sorted(self.date_region_count.keys()) ] write_json(meta, self.auspice_meta_fname, indent=None) self.export_accession_numbers()
def export_to_auspice(self, tree_fields = [], tree_pop_list = [], annotations = [], seq='aa'): from tree_util import dendropy_to_json, all_descendants from io_util import write_json, read_json print time.strftime("%H:%M:%S") + " ---" # Move sequence data to separate file print "Writing sequences" elems = {} for node in self.tree: if hasattr(node, "clade") and hasattr(node, "seq"): elems[node.clade] = {} elems[node.clade]['nuc'] = {pos:state for pos, (state, ancstate) in enumerate(izip(node.seq, self.tree.seed_node.seq)) if state!=ancstate} for anno, aa_seq in node.aa_seq.iteritems(): elems[node.clade][anno] = {pos:state for pos, (state, ancstate) in enumerate(izip(aa_seq, self.tree.seed_node.aa_seq[anno])) if state!=ancstate} elems['root'] = {} elems['root']['nuc'] = self.tree.seed_node.seq for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems(): elems['root'][anno] = aa_seq write_json(elems, self.auspice_sequences_fname, indent=None) print "Writing tree" self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields) for node in all_descendants(self.tree_json): for attr in tree_pop_list: if attr in node: node.pop(attr, None) if "freq" in node: for reg in node["freq"]: try: node["freq"][reg] = [round(x,3) for x in node["freq"][reg]] except: node["freq"][reg] = "undefined" if hasattr(self,"clade_designations"): # find basal node of clade and assign clade x and y values based on this basal node clade_present = {} clade_xval = {} clade_yval = {} self.frequencies['clades'] = {reg:{"pivots":list(self.tree.seed_node.pivots)} for reg in self.tree.seed_node.freq} for clade, gt in self.clade_designations.iteritems(): if clade in annotations: print "Annotating clade", clade tmp_nodes = sorted((node for node in self.tree.postorder_node_iter() if not node.is_leaf() and all([node.aa_seq[gene][pos-1]==aa for gene, pos, aa in gt])), key=lambda node: node.xvalue) if len(tmp_nodes): clade_present[clade] = True base_node = tmp_nodes[0] clade_xval[clade] = base_node.xvalue clade_yval[clade] = base_node.yvalue for region in base_node.freq: try: self.frequencies["clades"][region][clade.lower()] = [round(x,3) for x in base_node.freq[region]] print "added frequencies",region, clade except: print base_node.freq[region] else: clade_present[clade] = False print "clade",clade, gt, "not in tree" # append clades, coordinates and genotype to meta self.tree_json["clade_annotations"] = [(clade, clade_xval[clade],clade_yval[clade], "/".join([gene+':'+str(pos)+aa for gene, pos, aa in gt])) for clade, gt in self.clade_designations.iteritems() if clade in annotations and clade_present[clade] == True] write_json(self.tree_json, self.auspice_tree_fname, indent=None) try: read_json(self.auspice_tree_fname) except: print "Read failed, rewriting with indents" write_json(self.tree_json, self.auspice_tree_fname, indent=1) # Include genotype frequencies if hasattr(self, 'frequencies'): if not hasattr(self, 'aa_entropy') and not hasattr(self, 'nuc_entropy'): self.determine_variable_positions() if hasattr(self, 'aa_entropy'): self.frequencies["entropy"] = {} self.frequencies["location"] = {} for anno, alnS in self.aa_entropy.iteritems(): self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\ int(self.cds[anno].location.start)] self.frequencies["entropy"][anno] = [ [pos, S, muts] for pos,S,muts in izip(xrange(alnS.shape[0]), alnS,self.variable_aa_identities[anno]) ] elif seq=='nuc' and hasattr(self, 'nuc_entropy'): self.frequencies["entropy"] = [ [pos, S, muts] for pos,S,muts in izip(xrange(self.nuc_entropy.shape[0]), self.nuc_entropy,self.variable_nuc_identities) ] write_json(self.frequencies, self.auspice_frequency_fname) print("WRITEEN") # Write out metadata print "Writing out metadata" meta = {} meta["updated"] = time.strftime("X%d %b %Y").replace('X0','X').replace('X','') try: from pygit2 import Repository, discover_repository current_working_directory = os.getcwd() repository_path = discover_repository(current_working_directory) repo = Repository(repository_path) commit_id = repo[repo.head.target].id meta["commit"] = str(commit_id) except ImportError: meta["commit"] = "unknown" if hasattr(self,"date_region_count"): meta["regions"] = self.regions meta["virus_stats"] = [ [str(y)+'-'+str(m)] + [self.date_region_count[(y,m)][reg] for reg in self.regions] for y,m in sorted(self.date_region_count.keys()) ] write_json(meta, self.auspice_meta_fname, indent=0)
def refine_tree(self): import tree_refine tree_refine.main(self.tree, self.viruses, config['outgroup'], config['cds']) write_json(dendropy_to_json(self.tree.seed_node), self.intermediate_tree_fname)
def align(self): import virus_align self.viruses = virus_align.main(self.viruses) out_fname = 'virus_align.json' write_json(self.viruses, out_fname)
def clean_viruses(self): import virus_clean self.viruses = virus_clean.main(self.viruses) write_json(self.viruses, self.clean_virus_fname)