def clade_frequency_correlations_func_polarizer(params): # set up the prediction and pass all parameters to the wrapper function params.diffusion=1.0 params.gamma = 1.0 prediction = test_flu.predict_params(['polarizer'], params) # define the methodes for which the predictions are to be evaluated methods = [('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params) tbins_eval = [date(year=params.year, month = 6, day=1), date(year=params.year+1, month = 6, day=1)] combined_data = test_flu.make_combined_data(prediction, test_data, collapse = params.collapse) combined_tree = flu.flu_ranking(combined_data, time_bins = tbins_eval, pseudo_count = 0) combined_tree.expansion_score() tree_utils.find_internal_nodes(prediction.T,combined_tree.T) freqs = [node.mirror_node.temporal_frequency for node in prediction.non_terminals] polarizers= [] for tau in mem_scale: prediction.calculate_polarizers(mem = tau) polarizers.append([node.polarizer for node in prediction.non_terminals]) polarizers_and_freqs = np.hstack( (np.array(polarizers).T, np.array(freqs))) return polarizers_and_freqs
def clade_frequency_correlations_func(params): # set up the prediction and pass all parameters to the wrapper function prediction = test_flu.predict_params(['mean_fitness'], params) # define the methodes for which the predictions are to be evaluated methods = [('mean_fitness', '_ext', prediction.terminals), ('mean_fitness', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params) tbins_eval = [date(year=params.year, month = 6, day=1), date(year=params.year+1, month = 6, day=1)] combined_data = test_flu.make_combined_data(prediction, test_data, collapse = params.collapse) combined_tree = flu.flu_ranking(combined_data, time_bins = tbins_eval, pseudo_count = 0) combined_tree.expansion_score() tree_utils.find_internal_nodes(prediction.T,combined_tree.T) fitness_and_freqs = [] for node in prediction.non_terminals: fitness_and_freqs.append([node.mean_fitness, node.polarizer]+list(node.mirror_node.temporal_frequency)) fitness_and_freqs = np.array(fitness_and_freqs) return fitness_and_freqs
def clade_frequency_correlations_func(params): # set up the prediction and pass all parameters to the wrapper function prediction = test_flu.predict_params(['mean_fitness'], params) # define the methodes for which the predictions are to be evaluated methods = [('mean_fitness', '_ext', prediction.terminals), ('mean_fitness', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate( prediction, methods, params) tbins_eval = [ date(year=params.year, month=6, day=1), date(year=params.year + 1, month=6, day=1) ] combined_data = test_flu.make_combined_data(prediction, test_data, collapse=params.collapse) combined_tree = flu.flu_ranking(combined_data, time_bins=tbins_eval, pseudo_count=0) combined_tree.expansion_score() tree_utils.find_internal_nodes(prediction.T, combined_tree.T) fitness_and_freqs = [] for node in prediction.non_terminals: fitness_and_freqs.append([node.mean_fitness, node.polarizer] + list(node.mirror_node.temporal_frequency)) fitness_and_freqs = np.array(fitness_and_freqs) return fitness_and_freqs
def clade_frequency_correlations_func_polarizer(params): # set up the prediction and pass all parameters to the wrapper function params.diffusion = 1.0 params.gamma = 1.0 prediction = test_flu.predict_params(['polarizer'], params) # define the methodes for which the predictions are to be evaluated methods = [('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate( prediction, methods, params) tbins_eval = [ date(year=params.year, month=6, day=1), date(year=params.year + 1, month=6, day=1) ] combined_data = test_flu.make_combined_data(prediction, test_data, collapse=params.collapse) combined_tree = flu.flu_ranking(combined_data, time_bins=tbins_eval, pseudo_count=0) combined_tree.expansion_score() tree_utils.find_internal_nodes(prediction.T, combined_tree.T) freqs = [ node.mirror_node.temporal_frequency for node in prediction.non_terminals ] polarizers = [] for tau in mem_scale: prediction.calculate_polarizers(mem=tau) polarizers.append( [node.polarizer for node in prediction.non_terminals]) polarizers_and_freqs = np.hstack((np.array(polarizers).T, np.array(freqs))) return polarizers_and_freqs
params.pred = params.pred.replace('^', ' ') params.test = params.test.replace('^', ' ') params.subsample = 0.7 # get run specific file names fname_base, name_mod = test_flu.get_fname(params) top_strain_method = 'mean_fitness' # allocate arrays to save the predictions nuc_dist_array = np.zeros((params.nreps, 12)) epi_dist_array = np.zeros((params.nreps, 12)) top_strains = [] for ii in xrange(params.nreps): # set up the prediction and pass all parameters to the wrapper function prediction = test_flu.predict_params([ 'mean_fitness', 'expansion_score', 'depth', 'polarizer', flu.combined_ranking_internal, flu.combined_ranking_external ], params) # define the methodes for which the predictions are to be evaluated methods = [('mean_fitness', '_ext', prediction.terminals), ('mean_fitness', '_int', prediction.non_terminals), ('expansion_score', '_int', prediction.non_terminals), ('expansion_fitness', '', prediction.non_terminals), ('time_fitness', '', prediction.terminals), ('ladder_rank', '', prediction.terminals), ('date', '', prediction.terminals), ('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate( prediction, methods, params) nuc_dist_array[ii,:] = [distances['average'],distances['minimal'],distances['L&L']]\
params.diffusion = 0.2 params.omega = 0.3 for gamma in [1.0]: #, 2.0, 3.0,5.0]: params.gamma=gamma for sdt in [1,100]: file_base = params.base_name = data_dir+'_'.join(map(str,['/N', N, 'L', L, 'nflip',nflip ,'mu',mu_sim,'sdt', sdt]))+'/'+prefix+'seqs' pred_label = params.sample_size, params.gamma, params.diffusion, params.omega, params.valdt*params.dt # make figure plt.figure(figsize= (12,5)) ### PLOT EXAMPLE PREDICTION print "run example prediction" prediction = test_flu.predict_params(['mean_fitness'],params) plt.subplot(131) plt.text(xpos,ypos,'B', transform = plt.gca().transAxes, fontsize = 36) true_fitness = np.array([n.fitness for n in prediction.terminals]) true_fitness -= true_fitness.mean() estimated_fitness = np.array([n.mean_fitness for n in prediction.terminals]) estimated_fitness -= estimated_fitness.mean() spcorr = stats.spearmanr(true_fitness, estimated_fitness) im = np.zeros((prediction.nstates, prediction.nstates)) for node, true_fit in zip(prediction.terminals, true_fitness): xi = np.argmin(prediction.fitness_grid<(true_fit-true_fitness.max())/sigma+prediction.fitness_grid[-2]) im[xi,:]+=node.prob #scatter fitness plt.scatter(true_fitness/sigma, estimated_fitness, label = r"$\rho = "+str(np.round(spcorr[0],2))+"$")
tree_figure_folder = '../figures_trees/' analysis_folder = test_flu.flu_analysis_folder # parse the commandline arguments parser = test_flu.make_flu_parser() parser.add_argument('--tau', default=1.0, type=float, help='memory time scale of the tree polarizer') params = parser.parse_args() # get name snippets to link output files to run parameters base_name, name_mod = test_flu.get_fname(params) params.gamma = 1.0 params.diffusion = 1.0 # set up the prediction and pass all parameters to the wrapper function prediction = test_flu.predict_params(['polarizer'], params) prediction.calculate_polarizers(params.tau) # define the methodes for which the predictions are to be evaluated methods = [('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate( prediction, methods, params) # calculate the fitness differentials for each internal branch and associate with # different types of mutations that happen on these branches dfit = [] for node in prediction.non_terminals: for child in node.clades: delta_fitness = child.polarizer - node.polarizer muts = child.mutations
import numpy as np from scipy import stats import glob, pickle, gzip, os, argparse from datetime import date plt.rcParams.update(test_flu.mpl_params) tree_figure_folder = '../figures_trees/' analysis_folder = test_flu.flu_analysis_folder # parse the commandline arguments parser = test_flu.make_flu_parser() params = parser.parse_args() # get name snippets to link output files to run parameters base_name, name_mod = test_flu.get_fname(params) # set up the prediction and pass all parameters to the wrapper function prediction = test_flu.predict_params( ['mean_fitness', 'expansion_score', 'depth', 'polarizer'], params) # define the methodes for which the predictions are to be evaluated methods = [('mean_fitness', '_ext', prediction.terminals), ('mean_fitness', '_int', prediction.non_terminals), ('expansion_score', '_int', prediction.non_terminals), ('expansion_fitness', '', prediction.non_terminals), ('time_fitness', '', prediction.terminals), ('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate( prediction, methods, params) # make plots for leaves colored by inferred mean fitness if params.plot: laessig_prediction = test_flu.get_LL(params.flutype)
from matplotlib import pyplot as plt import numpy as np import glob, pickle from scipy import stats parser = test_flu.make_toy_parser() params = parser.parse_args() params.pred = 'toy' params.test = 'toy' params.flutype='toy' params.boost = 0.0 params.subsample = 1.0 params.year = params.gen # set up a toy data set using the classes in test_flu_prediction prediction = test_flu.predict_params(['mean_fitness', 'ladder_rank', 'polarizer'], params) base_name, name_mod = test_flu.get_fname(params) methods = [ ('fitness', '_ext', prediction.terminals), ('mean_fitness', '_ext', prediction.terminals), ('mean_fitness', '_int', prediction.non_terminals), ('ladder_rank', '', prediction.terminals), ('fitness','',prediction.terminals), ('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] # last prediction is true fitness distances, dist_epi_dummy, test_data = test_flu.evaluate(prediction, methods, params) distances_list = [distances['average'],distances['minimal']]\ +[distances[m[0]+m[1]] for m in methods] fitness_correlation_mf = stats.spearmanr([n.fitness for n in prediction.terminals],
for gamma in [1.0]: #, 2.0, 3.0,5.0]: params.gamma = gamma for sdt in [1, 100]: file_base = params.base_name = data_dir + '_'.join( map(str, ['/N', N, 'L', L, 'nflip', nflip, 'mu', mu_sim, 'sdt', sdt ])) + '/' + prefix + 'seqs' pred_label = params.sample_size, params.gamma, params.diffusion, params.omega, params.valdt * params.dt # make figure plt.figure(figsize=(12, 5)) ### PLOT EXAMPLE PREDICTION print "run example prediction" prediction = test_flu.predict_params(['mean_fitness'], params) plt.subplot(131) plt.text(xpos, ypos, 'B', transform=plt.gca().transAxes, fontsize=36) true_fitness = np.array([n.fitness for n in prediction.terminals]) true_fitness -= true_fitness.mean() estimated_fitness = np.array( [n.mean_fitness for n in prediction.terminals]) estimated_fitness -= estimated_fitness.mean() spcorr = stats.spearmanr(true_fitness, estimated_fitness) im = np.zeros((prediction.nstates, prediction.nstates)) for node, true_fit in zip(prediction.terminals, true_fitness): xi = np.argmin( prediction.fitness_grid < (true_fit - true_fitness.max()) / sigma + prediction.fitness_grid[-2]) im[xi, :] += node.prob