def clade_frequency_correlations_func_polarizer(params): # set up the prediction and pass all parameters to the wrapper function params.diffusion=1.0 params.gamma = 1.0 prediction = test_flu.predict_params(['polarizer'], params) # define the methodes for which the predictions are to be evaluated methods = [('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params) tbins_eval = [date(year=params.year, month = 6, day=1), date(year=params.year+1, month = 6, day=1)] combined_data = test_flu.make_combined_data(prediction, test_data, collapse = params.collapse) combined_tree = flu.flu_ranking(combined_data, time_bins = tbins_eval, pseudo_count = 0) combined_tree.expansion_score() tree_utils.find_internal_nodes(prediction.T,combined_tree.T) freqs = [node.mirror_node.temporal_frequency for node in prediction.non_terminals] polarizers= [] for tau in mem_scale: prediction.calculate_polarizers(mem = tau) polarizers.append([node.polarizer for node in prediction.non_terminals]) polarizers_and_freqs = np.hstack( (np.array(polarizers).T, np.array(freqs))) return polarizers_and_freqs
def clade_frequency_correlations_func(params): # set up the prediction and pass all parameters to the wrapper function prediction = test_flu.predict_params(['mean_fitness'], params) # define the methodes for which the predictions are to be evaluated methods = [('mean_fitness', '_ext', prediction.terminals), ('mean_fitness', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params) tbins_eval = [date(year=params.year, month = 6, day=1), date(year=params.year+1, month = 6, day=1)] combined_data = test_flu.make_combined_data(prediction, test_data, collapse = params.collapse) combined_tree = flu.flu_ranking(combined_data, time_bins = tbins_eval, pseudo_count = 0) combined_tree.expansion_score() tree_utils.find_internal_nodes(prediction.T,combined_tree.T) fitness_and_freqs = [] for node in prediction.non_terminals: fitness_and_freqs.append([node.mean_fitness, node.polarizer]+list(node.mirror_node.temporal_frequency)) fitness_and_freqs = np.array(fitness_and_freqs) return fitness_and_freqs
def clade_frequency_correlations_func(params): # set up the prediction and pass all parameters to the wrapper function prediction = test_flu.predict_params(['mean_fitness'], params) # define the methodes for which the predictions are to be evaluated methods = [('mean_fitness', '_ext', prediction.terminals), ('mean_fitness', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate( prediction, methods, params) tbins_eval = [ date(year=params.year, month=6, day=1), date(year=params.year + 1, month=6, day=1) ] combined_data = test_flu.make_combined_data(prediction, test_data, collapse=params.collapse) combined_tree = flu.flu_ranking(combined_data, time_bins=tbins_eval, pseudo_count=0) combined_tree.expansion_score() tree_utils.find_internal_nodes(prediction.T, combined_tree.T) fitness_and_freqs = [] for node in prediction.non_terminals: fitness_and_freqs.append([node.mean_fitness, node.polarizer] + list(node.mirror_node.temporal_frequency)) fitness_and_freqs = np.array(fitness_and_freqs) return fitness_and_freqs
def clade_frequency_correlations_func_polarizer(params): # set up the prediction and pass all parameters to the wrapper function params.diffusion = 1.0 params.gamma = 1.0 prediction = test_flu.predict_params(['polarizer'], params) # define the methodes for which the predictions are to be evaluated methods = [('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate( prediction, methods, params) tbins_eval = [ date(year=params.year, month=6, day=1), date(year=params.year + 1, month=6, day=1) ] combined_data = test_flu.make_combined_data(prediction, test_data, collapse=params.collapse) combined_tree = flu.flu_ranking(combined_data, time_bins=tbins_eval, pseudo_count=0) combined_tree.expansion_score() tree_utils.find_internal_nodes(prediction.T, combined_tree.T) freqs = [ node.mirror_node.temporal_frequency for node in prediction.non_terminals ] polarizers = [] for tau in mem_scale: prediction.calculate_polarizers(mem=tau) polarizers.append( [node.polarizer for node in prediction.non_terminals]) polarizers_and_freqs = np.hstack((np.array(polarizers).T, np.array(freqs))) return polarizers_and_freqs
prediction = test_flu.predict_params([ 'mean_fitness', 'expansion_score', 'depth', 'polarizer', flu.combined_ranking_internal, flu.combined_ranking_external ], params) # define the methodes for which the predictions are to be evaluated methods = [('mean_fitness', '_ext', prediction.terminals), ('mean_fitness', '_int', prediction.non_terminals), ('expansion_score', '_int', prediction.non_terminals), ('expansion_fitness', '', prediction.non_terminals), ('time_fitness', '', prediction.terminals), ('ladder_rank', '', prediction.terminals), ('date', '', prediction.terminals), ('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate( prediction, methods, params) nuc_dist_array[ii,:] = [distances['average'],distances['minimal'],distances['L&L']]\ +[distances[m[0]+m[1]] for m in methods] epi_dist_array[ii,:] = [distances_epi['average'],distances_epi['minimal'],distances_epi['L&L']]\ +[distances_epi[m[0]+m[1]] for m in methods] # memorize the strain predicted best top_strains.append( prediction.best_node(method=top_strain_method, nodes=prediction.terminals)) #if file does not exist, create and write header fname_nuc = analysis_folder + '_'.join([fname_base, name_mod, 'nuc.dat']) if not os.path.isfile(fname_nuc): with open(fname_nuc, 'w') as outfile: outfile.write('#average\tminimal\tL&L\t' + '\t'.join([m[0] + m[1] for m in methods]) + '\n')
parser = test_flu.make_flu_parser() parser.add_argument('--tau', default = 1.0, type = float, help= 'memory time scale of the tree polarizer') params=parser.parse_args() # get name snippets to link output files to run parameters base_name, name_mod = test_flu.get_fname(params) params.gamma=1.0 params.diffusion=1.0 # set up the prediction and pass all parameters to the wrapper function prediction = test_flu.predict_params(['polarizer'], params) prediction.calculate_polarizers(params.tau) # define the methodes for which the predictions are to be evaluated methods = [ ('polarizer', '_ext', prediction.terminals), ('polarizer', '_int', prediction.non_terminals)] distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params) # calculate the fitness differentials for each internal branch and associate with # different types of mutations that happen on these branches dfit = [] for node in prediction.non_terminals: for child in node.clades: delta_fitness = child.polarizer - node.polarizer muts = child.mutations aa_muts = child.aa_mutations if child.branch_length<0.01: dfit.append((delta_fitness,child.is_terminal(), len(muts), len(aa_muts), len([m[0] for m in aa_muts if m[0] in flu.HA1_antigenic_sites]), len([m[0] for m in aa_muts if m[0] in flu.cluster_positions]))) else: print child,'excluded due to long branch length'