def load_from_file(self, tree_fname=None, virus_fname = None):
		if tree_fname is None: tree_fname = self.intermediate_tree_fname
		if os.path.isfile(tree_fname):
			self.tree = json_to_dendropy(read_json(tree_fname))
		if virus_fname is None: virus_fname = self.clean_virus_fname
		if os.path.isfile(virus_fname):
			self.viruses = read_json(virus_fname)
		if os.path.isfile(self.frequency_fname):
			self.frequencies = read_json(self.frequency_fname)
Exemple #2
0
    def export_HI_mutation_effects(self):
        from io_util import write_json, read_json
        # make a tab delimited file with the mutaton effects
        table_effects = []
        HI_mutation_effects_fname = self.output_path + self.prefix + self.resolution_prefix + 'HI_mutation_effects.tsv'
        with open(HI_mutation_effects_fname, 'w') as ofile:
            for mut, val in self.mutation_effects.iteritems():
                mut_str = '/'.join([x[1] for x in self.mutation_clusters[mut]])
                ofile.write(mut_str + '\t' + str(np.round(val, 4)) + '\t' +
                            str(self.mutation_counter[mut]) + '\n')
                if val > 0.001:
                    table_effects.append((mut_str, round(val, 2)))
        # export mutation effects to JSON
        try:  #they are added to a larger json fir different lineages and resultions
            display_effects = read_json(self.auspice_HI_display_mutations)
        except:  # if file doesn't yet exist, create and empty dictionary
            display_effects = {}

        # effects for use in the js are indext by first mutation in cluster
        model_effects = {
            mut[0] + ':' + mut[1]: val
            for mut, val in self.mutation_effects.iteritems() if val > 0.01
        }
        write_json(model_effects, self.auspice_HI_fname)

        if self.virus_type not in display_effects:
            display_effects[self.virus_type] = {}
        table_effects.sort(key=lambda x: x[1], reverse=True)
        display_effects[self.virus_type][self.resolution] = table_effects
        write_json(display_effects, self.auspice_HI_display_mutations)
Exemple #3
0
	def export_HI_mutation_effects(self):
		from io_util import write_json, read_json
		# make a tab delimited file with the mutaton effects
		table_effects = []
		HI_mutation_effects_fname = self.output_path+self.prefix+self.resolution_prefix+'HI_mutation_effects.tsv'
		with open(HI_mutation_effects_fname, 'w') as ofile:
			for mut, val in self.mutation_effects.iteritems():
				mut_str = '/'.join([x[1] for x in self.mutation_clusters[mut]])
				ofile.write(mut_str+'\t'+str(np.round(val,4))+'\t'+str(self.mutation_counter[mut])+'\n')
				if val>0.001:
					table_effects.append((mut_str,round(val,2)))
		# export mutation effects to JSON
		try: #they are added to a larger json fir different lineages and resultions
			display_effects = read_json(self.auspice_HI_display_mutations)
		except: # if file doesn't yet exist, create and empty dictionary
			display_effects = {}

		# effects for use in the js are indext by first mutation in cluster
		model_effects = {mut[0]+':'+mut[1]:val for mut, val in
						 self.mutation_effects.iteritems() if val>0.01}
		write_json(model_effects, self.auspice_HI_fname)

		if self.virus_type not in display_effects: display_effects[self.virus_type]={}
		table_effects.sort(key = lambda x:x[1], reverse=True)
		display_effects[self.virus_type][self.resolution] = table_effects
		write_json(display_effects, self.auspice_HI_display_mutations)
def test(params):
    from io_util import read_json
    from tree_util import json_to_dendropy, to_Biopython, color_BioTree_by_attribute
    from Bio import Phylo
    tree_fname = 'data/tree_refine_10y_50v.json'
    tree = json_to_dendropy(read_json(tree_fname))
    fm = fitness_model(tree, predictors=params['predictors'], verbose=2)
    fm.predict(niter=params['niter'])
    #btree = to_Biopython(tree)
    #color_BioTree_by_attribute(btree, 'fitness')
    #Phylo.draw(btree, label_func=lambda x:'')
    return fm
def test(params):
	from io_util import read_json
	from tree_util import json_to_dendropy, to_Biopython, color_BioTree_by_attribute
	from Bio import Phylo
	tree_fname='data/tree_refine_10y_50v.json'
	tree =  json_to_dendropy(read_json(tree_fname))
	fm = fitness_model(tree, predictors = params['predictors'], verbose=2)
	fm.predict(niter = params['niter'])
	#btree = to_Biopython(tree)
	#color_BioTree_by_attribute(btree, 'fitness')
	#Phylo.draw(btree, label_func=lambda x:'')
	return fm
Exemple #6
0
def test():
    from Bio import Phylo
    tree = json_to_dendropy(read_json('auspice/tree.json'))
    print "calculate local branching index"
    T2 = get_average_T2(tree, 365)
    tau = T2 * 2**-4
    print "avg pairwise distance:", T2
    print "memory time scale:", tau
    calc_delta_LBI(tree, tau, datetime.datetime(2014, 1, 1))
    bioTree = to_Biopython(tree)
    color_BioTree_by_attribute(bioTree, 'date')
    Phylo.draw(bioTree)
Exemple #7
0
def main():

    print "--- Tree LBI at " + time.strftime("%H:%M:%S") + " ---"

    tree = json_to_dendropy(read_json('data/tree_refine.json'))

    print "calculate local branching index"
    T2 = get_average_T2(tree, 365)
    tau = T2 * 2**-4
    print "avg pairwise distance:", T2
    print "memory time scale:", tau
    calc_LBI(tree, tau=tau)

    write_json(dendropy_to_json(tree.seed_node), "data/tree_LBI.json")
def main(params):
    import time
    from io_util import read_json
    from io_util import write_json
    from tree_util import json_to_dendropy, dendropy_to_json

    print "--- Start fitness model optimization at " + time.strftime("%H:%M:%S") + " ---"

    tree_fname = "data/tree_refine.json"
    tree = json_to_dendropy(read_json(tree_fname))
    fm = fitness_model(tree, predictors=params["predictors"], verbose=1)
    fm.predict(niter=params["niter"])
    out_fname = "data/tree_fitness.json"
    write_json(dendropy_to_json(tree.seed_node), out_fname)
    return out_fname
def main(params):
	import time
	from io_util import read_json
	from io_util import write_json	
	from tree_util import json_to_dendropy, dendropy_to_json
	
	print "--- Start fitness model optimization at " + time.strftime("%H:%M:%S") + " ---"

	tree_fname='data/tree_refine.json'
	tree =  json_to_dendropy(read_json(tree_fname))
	fm = fitness_model(tree, predictors = params['predictors'], verbose=1)
	fm.predict(niter = params['niter'])
	out_fname = "tree_fitness.json"
	write_json(dendropy_to_json(tree.seed_node), out_fname)
	return out_fname
Exemple #10
0
def main(in_fname='data/tree_refine.json', tree=True):

	print "--- Mutational tolerance at " + time.strftime("%H:%M:%S") + " ---"
	viruses = read_json(in_fname)
	if tree:
		viruses = json_to_dendropy(viruses)

	assign_fitness(viruses)

	if tree:
		out_fname = "data/tree_tolerance.json"
		write_json(dendropy_to_json(viruses.seed_node), out_fname)
	else:
		out_fname = "data/virus_tolerance.json"
		write_json(viruses, out_fname)
	return out_fname, viruses
def main(in_fname='tree_refine.json', tree=True):

	print "--- Mutational tolerance at " + time.strftime("%H:%M:%S") + " ---"
	viruses = read_json(in_fname)
	if tree:
		viruses = json_to_dendropy(viruses)

	assign_fitness(viruses)

	if tree:
		out_fname = "tree_tolerance.json"
		write_json(dendropy_to_json(viruses.seed_node), out_fname)
	else:
		out_fname = "virus_tolerance.json"
		write_json(viruses, out_fname)
	return out_fname, viruses
Exemple #12
0
def main(tree_fname = 'data/tree_refine.json'):

	print "--- Testing predictor evaluations ---"
	tree =  json_to_dendropy(read_json(tree_fname))

	print "Calculating epitope distances"
	calc_epitope_distance(tree)

	print "Calculating nonepitope distances"
	calc_nonepitope_distance(tree)

	print "Calculating LBI"
#	calc_LBI(tree)

	print "Writing decorated tree"
	out_fname = "data/tree_predictors.json"
	write_json(dendropy_to_json(tree.seed_node), out_fname)
	return out_fname
Exemple #13
0
    def export_to_auspice(self,
                          tree_fields=[],
                          tree_pop_list=[],
                          annotations=[],
                          seq='aa'):
        from tree_util import dendropy_to_json, all_descendants
        from io_util import write_json, read_json
        print "--- Streamline at " + time.strftime("%H:%M:%S") + " ---"
        # Move sequence data to separate file
        print "Writing sequences"
        elems = {}
        for node in self.tree:
            if hasattr(node, "clade") and hasattr(node, "seq"):
                elems[node.clade] = {}
                elems[node.clade]['nuc'] = {
                    pos: state
                    for pos, (state, ancstate) in enumerate(
                        izip(node.seq, self.tree.seed_node.seq))
                    if state != ancstate
                }
                for anno, aa_seq in node.aa_seq.iteritems():
                    elems[node.clade][anno] = {
                        pos: state
                        for pos, (state, ancstate) in enumerate(
                            izip(aa_seq, self.tree.seed_node.aa_seq[anno]))
                        if state != ancstate
                    }

        elems['root'] = {}
        elems['root']['nuc'] = self.tree.seed_node.seq
        for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems():
            elems['root'][anno] = aa_seq
        write_json(elems, self.auspice_sequences_fname, indent=None)

        print "Writing tree"
        self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields)
        for node in all_descendants(self.tree_json):
            for attr in tree_pop_list:
                if attr in node:
                    node.pop(attr, None)
            if "freq" in node:
                for reg in node["freq"]:
                    try:
                        node["freq"][reg] = [
                            round(x, 3) for x in node["freq"][reg]
                        ]
                    except:
                        node["freq"][reg] = "undefined"

        if hasattr(self, "clade_designations"):
            # find basal node of clade and assign clade x and y values based on this basal node
            clade_present = {}
            clade_xval = {}
            clade_yval = {}
            if hasattr(self.tree.seed_node, "freq"):
                self.frequencies['clades'] = {
                    reg: {
                        "pivots": list(self.tree.seed_node.pivots)
                    }
                    for reg in self.tree.seed_node.freq
                }

            for clade, gt in self.clade_designations.iteritems():
                if clade in annotations:
                    print "Annotating clade", clade
                    tmp_nodes = sorted(
                        (node for node in self.tree.postorder_node_iter()
                         if not node.is_leaf() and all([
                             node.aa_seq[gene][pos - 1] == aa
                             for gene, pos, aa in gt
                         ])),
                        key=lambda node: node.xvalue)
                    if len(tmp_nodes):
                        clade_present[clade] = True
                        base_node = tmp_nodes[0]
                        clade_xval[clade] = base_node.xvalue
                        clade_yval[clade] = base_node.yvalue
                        if hasattr(base_node, 'freq'):
                            for region in base_node.freq:
                                try:
                                    self.frequencies["clades"][region][
                                        clade.lower()] = [
                                            round(x, 3)
                                            for x in base_node.freq[region]
                                        ]
                                    print "added frequencies", region, clade
                                except:
                                    print base_node.freq[region]
                    else:
                        clade_present[clade] = False
                        print "clade", clade, gt, "not in tree"
            # append clades, coordinates and genotype to meta
            self.tree_json["clade_annotations"] = [
                (clade, clade_xval[clade], clade_yval[clade],
                 "/".join([gene + ':' + str(pos) + aa
                           for gene, pos, aa in gt]))
                for clade, gt in self.clade_designations.iteritems()
                if clade in annotations and clade_present[clade] == True
            ]
        write_json(self.tree_json, self.auspice_tree_fname, indent=None)
        try:
            read_json(self.auspice_tree_fname)
        except:
            print "Read failed, rewriting with indents"
            write_json(self.tree_json, self.auspice_tree_fname, indent=1)

        # Write out frequencies
        if hasattr(self, 'frequencies'):
            if not hasattr(self, 'aa_entropy') and not hasattr(
                    self, 'nuc_entropy'):
                self.determine_variable_positions()

            if hasattr(self, 'aa_entropy'):
                self.frequencies["entropy"] = {}
                self.frequencies["location"] = {}
                for anno, alnS in self.aa_entropy.iteritems():
                    self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\
                              int(self.cds[anno].location.start)]
                    self.frequencies["entropy"][anno] = [[
                        pos, S, muts
                    ] for pos, S, muts in izip(
                        xrange(alnS.shape[0]), alnS,
                        self.variable_aa_identities[anno])]
            elif seq == 'nuc' and hasattr(self, 'nuc_entropy'):
                self.frequencies["entropy"] = [
                    [pos, S, muts] for pos, S, muts in izip(
                        xrange(self.nuc_entropy.shape[0]), self.nuc_entropy,
                        self.variable_nuc_identities)
                ]

            write_json(self.frequencies, self.auspice_frequency_fname)

        # Write out metadata
        print "Writing out metadata"
        meta = {}
        meta["updated"] = time.strftime("X%d %b %Y").replace('X0',
                                                             'X').replace(
                                                                 'X', '')
        try:
            from pygit2 import Repository, discover_repository
            current_working_directory = os.getcwd()
            repository_path = discover_repository(current_working_directory)
            repo = Repository(repository_path)
            commit_id = repo[repo.head.target].id
            meta["commit"] = str(commit_id)
        except ImportError:
            meta["commit"] = "unknown"

        if hasattr(self, "date_region_count"):
            meta["regions"] = self.regions
            meta["virus_stats"] = [
                [str(y) + '-' + str(m)] +
                [self.date_region_count[(y, m)][reg] for reg in self.regions]
                for y, m in sorted(self.date_region_count.keys())
            ]
        write_json(meta, self.auspice_meta_fname, indent=None)
        self.export_accession_numbers()
Exemple #14
0
	def export_to_auspice(self, tree_fields = [], tree_pop_list = [], annotations = [], seq='aa'):
		from tree_util import dendropy_to_json, all_descendants
		from io_util import write_json, read_json
		print time.strftime("%H:%M:%S") + " ---"
		# Move sequence data to separate file
		print "Writing sequences"
		elems = {}
		for node in self.tree:
			if hasattr(node, "clade") and hasattr(node, "seq"):
				elems[node.clade] = {}
				elems[node.clade]['nuc'] = {pos:state for pos, (state, ancstate) in 
								enumerate(izip(node.seq, self.tree.seed_node.seq)) if state!=ancstate}
				for anno, aa_seq in node.aa_seq.iteritems():
					elems[node.clade][anno] = {pos:state for pos, (state, ancstate) in 
								enumerate(izip(aa_seq, self.tree.seed_node.aa_seq[anno])) if state!=ancstate}

		elems['root'] = {}
		elems['root']['nuc'] = self.tree.seed_node.seq
		for anno, aa_seq in self.tree.seed_node.aa_seq.iteritems():
			elems['root'][anno] = aa_seq
		write_json(elems, self.auspice_sequences_fname, indent=None)

		print "Writing tree"
		self.tree_json = dendropy_to_json(self.tree.seed_node, tree_fields)
		for node in all_descendants(self.tree_json):
			for attr in tree_pop_list:
				if attr in node:
					node.pop(attr, None)
			if "freq" in node:
				for reg in node["freq"]:
					try:
						node["freq"][reg] = [round(x,3) for x in node["freq"][reg]]
					except:
						node["freq"][reg] = "undefined"				

		if hasattr(self,"clade_designations"):
			# find basal node of clade and assign clade x and y values based on this basal node
			clade_present = {}
			clade_xval = {}
			clade_yval = {}
			self.frequencies['clades'] = {reg:{"pivots":list(self.tree.seed_node.pivots)} 
											for reg in self.tree.seed_node.freq}

			for clade, gt in self.clade_designations.iteritems():
				if clade in annotations:
					print "Annotating clade", clade
					tmp_nodes = sorted((node for node in self.tree.postorder_node_iter()
						if not node.is_leaf() and all([node.aa_seq[gene][pos-1]==aa for gene, pos, aa in gt])),
						key=lambda node: node.xvalue)
					if len(tmp_nodes):
						clade_present[clade] = True
						base_node = tmp_nodes[0]
						clade_xval[clade] = base_node.xvalue
						clade_yval[clade] = base_node.yvalue
						for region in base_node.freq:
							try:
								self.frequencies["clades"][region][clade.lower()] = [round(x,3) for x in base_node.freq[region]]
								print "added frequencies",region, clade
							except:
								print base_node.freq[region]
					else:
						clade_present[clade] = False
						print "clade",clade, gt, "not in tree"
			# append clades, coordinates and genotype to meta
			self.tree_json["clade_annotations"] = [(clade, clade_xval[clade],clade_yval[clade], 
								"/".join([gene+':'+str(pos)+aa for gene, pos, aa in gt]))
							for clade, gt in self.clade_designations.iteritems() 
							if clade in annotations and clade_present[clade] == True]
		write_json(self.tree_json, self.auspice_tree_fname, indent=None)
		try:
			read_json(self.auspice_tree_fname)
		except:
			print "Read failed, rewriting with indents"	
			write_json(self.tree_json, self.auspice_tree_fname, indent=1)
			
		# Include genotype frequencies
		if hasattr(self, 'frequencies'):
			if not hasattr(self, 'aa_entropy') and not hasattr(self, 'nuc_entropy'):
				self.determine_variable_positions()

			if hasattr(self, 'aa_entropy'):
				self.frequencies["entropy"] = {}
				self.frequencies["location"] = {}
				for anno, alnS in self.aa_entropy.iteritems():
					self.frequencies["location"][anno] = [int(self.cds[anno].location.start),\
															int(self.cds[anno].location.start)]
					self.frequencies["entropy"][anno] = [ [pos, S, muts] for pos,S,muts in 
						izip(xrange(alnS.shape[0]), alnS,self.variable_aa_identities[anno]) ]
			elif seq=='nuc' and hasattr(self, 'nuc_entropy'):
				self.frequencies["entropy"] = [ [pos, S, muts] for pos,S,muts in 
						izip(xrange(self.nuc_entropy.shape[0]), self.nuc_entropy,self.variable_nuc_identities) ]

			write_json(self.frequencies, self.auspice_frequency_fname)
			print("WRITEEN")
		# Write out metadata
		print "Writing out metadata"		
		meta = {}
		meta["updated"] = time.strftime("X%d %b %Y").replace('X0','X').replace('X','')
		try:
			from pygit2 import Repository, discover_repository
			current_working_directory = os.getcwd()
			repository_path = discover_repository(current_working_directory)
			repo = Repository(repository_path)
			commit_id = repo[repo.head.target].id
			meta["commit"] = str(commit_id)
		except ImportError:
			meta["commit"] = "unknown"
		
		if hasattr(self,"date_region_count"):
			meta["regions"] = self.regions
			meta["virus_stats"] = [ [str(y)+'-'+str(m)] + [self.date_region_count[(y,m)][reg] for reg in self.regions]
									for y,m in sorted(self.date_region_count.keys()) ]
		write_json(meta, self.auspice_meta_fname, indent=0)