def clade_frequency_correlations_func_polarizer(params):
    # set up the prediction and pass all parameters to the wrapper function
    params.diffusion=1.0
    params.gamma = 1.0
    prediction = test_flu.predict_params(['polarizer'],
                                        params)


    # define the methodes for which the predictions are to be evaluated
    methods = [('polarizer', '_ext', prediction.terminals),
                ('polarizer', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params)


    tbins_eval = [date(year=params.year, month = 6, day=1), date(year=params.year+1, month = 6, day=1)]
    combined_data = test_flu.make_combined_data(prediction, test_data, collapse = params.collapse)
    combined_tree = flu.flu_ranking(combined_data, time_bins = tbins_eval, pseudo_count = 0)
    combined_tree.expansion_score()

    tree_utils.find_internal_nodes(prediction.T,combined_tree.T)
    freqs = [node.mirror_node.temporal_frequency for node in prediction.non_terminals]
    polarizers= []
    for tau in mem_scale:
        prediction.calculate_polarizers(mem = tau)
        polarizers.append([node.polarizer for node in prediction.non_terminals])

    polarizers_and_freqs = np.hstack( (np.array(polarizers).T, np.array(freqs)))
    return polarizers_and_freqs
def clade_frequency_correlations_func(params):
    # set up the prediction and pass all parameters to the wrapper function
    prediction = test_flu.predict_params(['mean_fitness'],
                                        params)


    # define the methodes for which the predictions are to be evaluated
    methods = [('mean_fitness', '_ext', prediction.terminals),
                ('mean_fitness', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(prediction, methods, params)


    tbins_eval = [date(year=params.year, month = 6, day=1), date(year=params.year+1, month = 6, day=1)]
    combined_data = test_flu.make_combined_data(prediction, test_data, collapse = params.collapse)
    combined_tree = flu.flu_ranking(combined_data, time_bins = tbins_eval, pseudo_count = 0)
    combined_tree.expansion_score()

    tree_utils.find_internal_nodes(prediction.T,combined_tree.T)

    fitness_and_freqs = []
    for node in prediction.non_terminals:
        fitness_and_freqs.append([node.mean_fitness, node.polarizer]+list(node.mirror_node.temporal_frequency))

    fitness_and_freqs = np.array(fitness_and_freqs)
    return fitness_and_freqs
def clade_frequency_correlations_func(params):
    # set up the prediction and pass all parameters to the wrapper function
    prediction = test_flu.predict_params(['mean_fitness'], params)

    # define the methodes for which the predictions are to be evaluated
    methods = [('mean_fitness', '_ext', prediction.terminals),
               ('mean_fitness', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(
        prediction, methods, params)

    tbins_eval = [
        date(year=params.year, month=6, day=1),
        date(year=params.year + 1, month=6, day=1)
    ]
    combined_data = test_flu.make_combined_data(prediction,
                                                test_data,
                                                collapse=params.collapse)
    combined_tree = flu.flu_ranking(combined_data,
                                    time_bins=tbins_eval,
                                    pseudo_count=0)
    combined_tree.expansion_score()

    tree_utils.find_internal_nodes(prediction.T, combined_tree.T)

    fitness_and_freqs = []
    for node in prediction.non_terminals:
        fitness_and_freqs.append([node.mean_fitness, node.polarizer] +
                                 list(node.mirror_node.temporal_frequency))

    fitness_and_freqs = np.array(fitness_and_freqs)
    return fitness_and_freqs
def clade_frequency_correlations_func_polarizer(params):
    # set up the prediction and pass all parameters to the wrapper function
    params.diffusion = 1.0
    params.gamma = 1.0
    prediction = test_flu.predict_params(['polarizer'], params)

    # define the methodes for which the predictions are to be evaluated
    methods = [('polarizer', '_ext', prediction.terminals),
               ('polarizer', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(
        prediction, methods, params)

    tbins_eval = [
        date(year=params.year, month=6, day=1),
        date(year=params.year + 1, month=6, day=1)
    ]
    combined_data = test_flu.make_combined_data(prediction,
                                                test_data,
                                                collapse=params.collapse)
    combined_tree = flu.flu_ranking(combined_data,
                                    time_bins=tbins_eval,
                                    pseudo_count=0)
    combined_tree.expansion_score()

    tree_utils.find_internal_nodes(prediction.T, combined_tree.T)
    freqs = [
        node.mirror_node.temporal_frequency
        for node in prediction.non_terminals
    ]
    polarizers = []
    for tau in mem_scale:
        prediction.calculate_polarizers(mem=tau)
        polarizers.append(
            [node.polarizer for node in prediction.non_terminals])

    polarizers_and_freqs = np.hstack((np.array(polarizers).T, np.array(freqs)))
    return polarizers_and_freqs
Example #5
0
params.pred = params.pred.replace('^', ' ')
params.test = params.test.replace('^', ' ')
params.subsample = 0.7

# get run specific file names
fname_base, name_mod = test_flu.get_fname(params)

top_strain_method = 'mean_fitness'
# allocate arrays to save the predictions
nuc_dist_array = np.zeros((params.nreps, 12))
epi_dist_array = np.zeros((params.nreps, 12))
top_strains = []
for ii in xrange(params.nreps):
    # set up the prediction and pass all parameters to the wrapper function
    prediction = test_flu.predict_params([
        'mean_fitness', 'expansion_score', 'depth', 'polarizer',
        flu.combined_ranking_internal, flu.combined_ranking_external
    ], params)

    # define the methodes for which the predictions are to be evaluated
    methods = [('mean_fitness', '_ext', prediction.terminals),
               ('mean_fitness', '_int', prediction.non_terminals),
               ('expansion_score', '_int', prediction.non_terminals),
               ('expansion_fitness', '', prediction.non_terminals),
               ('time_fitness', '', prediction.terminals),
               ('ladder_rank', '', prediction.terminals),
               ('date', '', prediction.terminals),
               ('polarizer', '_ext', prediction.terminals),
               ('polarizer', '_int', prediction.non_terminals)]
    distances, distances_epi, test_data = test_flu.evaluate(
        prediction, methods, params)
    nuc_dist_array[ii,:] = [distances['average'],distances['minimal'],distances['L&L']]\
params.diffusion = 0.2
params.omega = 0.3

for gamma in [1.0]: #, 2.0, 3.0,5.0]:
    params.gamma=gamma
    for sdt in [1,100]:
        file_base = params.base_name = data_dir+'_'.join(map(str,['/N', N, 'L', L, 'nflip',nflip
                                            ,'mu',mu_sim,'sdt', sdt]))+'/'+prefix+'seqs'
        
        pred_label = params.sample_size, params.gamma, params.diffusion, params.omega, params.valdt*params.dt

        # make figure
        plt.figure(figsize= (12,5))
        ### PLOT EXAMPLE PREDICTION
        print "run example prediction"
        prediction = test_flu.predict_params(['mean_fitness'],params)
        plt.subplot(131)
        plt.text(xpos,ypos,'B', transform = plt.gca().transAxes, fontsize = 36)
        true_fitness = np.array([n.fitness for n in prediction.terminals])
        true_fitness -= true_fitness.mean()
        estimated_fitness = np.array([n.mean_fitness for n in prediction.terminals])
        estimated_fitness -= estimated_fitness.mean()
        spcorr = stats.spearmanr(true_fitness, estimated_fitness)
        im = np.zeros((prediction.nstates, prediction.nstates))
        for node, true_fit in zip(prediction.terminals, true_fitness):
            xi = np.argmin(prediction.fitness_grid<(true_fit-true_fitness.max())/sigma+prediction.fitness_grid[-2])
            im[xi,:]+=node.prob
        
        #scatter fitness
        plt.scatter(true_fitness/sigma, estimated_fitness, 
                    label = r"$\rho = "+str(np.round(spcorr[0],2))+"$") 
tree_figure_folder = '../figures_trees/'
analysis_folder = test_flu.flu_analysis_folder
# parse the commandline arguments
parser = test_flu.make_flu_parser()
parser.add_argument('--tau',
                    default=1.0,
                    type=float,
                    help='memory time scale of the tree polarizer')
params = parser.parse_args()
# get name snippets to link output files to run parameters
base_name, name_mod = test_flu.get_fname(params)
params.gamma = 1.0
params.diffusion = 1.0

# set up the prediction and pass all parameters to the wrapper function
prediction = test_flu.predict_params(['polarizer'], params)
prediction.calculate_polarizers(params.tau)

# define the methodes for which the predictions are to be evaluated
methods = [('polarizer', '_ext', prediction.terminals),
           ('polarizer', '_int', prediction.non_terminals)]
distances, distances_epi, test_data = test_flu.evaluate(
    prediction, methods, params)

# calculate the fitness differentials for each internal branch and associate with
# different types of mutations that happen on these branches
dfit = []
for node in prediction.non_terminals:
    for child in node.clades:
        delta_fitness = child.polarizer - node.polarizer
        muts = child.mutations
Example #8
0
import numpy as np
from scipy import stats
import glob, pickle, gzip, os, argparse
from datetime import date

plt.rcParams.update(test_flu.mpl_params)

tree_figure_folder = '../figures_trees/'
analysis_folder = test_flu.flu_analysis_folder
# parse the commandline arguments
parser = test_flu.make_flu_parser()
params = parser.parse_args()
# get name snippets to link output files to run parameters
base_name, name_mod = test_flu.get_fname(params)
# set up the prediction and pass all parameters to the wrapper function
prediction = test_flu.predict_params(
    ['mean_fitness', 'expansion_score', 'depth', 'polarizer'], params)

# define the methodes for which the predictions are to be evaluated
methods = [('mean_fitness', '_ext', prediction.terminals),
           ('mean_fitness', '_int', prediction.non_terminals),
           ('expansion_score', '_int', prediction.non_terminals),
           ('expansion_fitness', '', prediction.non_terminals),
           ('time_fitness', '', prediction.terminals),
           ('polarizer', '_ext', prediction.terminals),
           ('polarizer', '_int', prediction.non_terminals)]
distances, distances_epi, test_data = test_flu.evaluate(
    prediction, methods, params)

# make plots for leaves colored by inferred mean fitness
if params.plot:
    laessig_prediction = test_flu.get_LL(params.flutype)
Example #9
0
from matplotlib import pyplot as plt
import numpy as np
import glob, pickle
from scipy import stats

parser = test_flu.make_toy_parser()
params = parser.parse_args()
params.pred = 'toy'
params.test = 'toy'
params.flutype='toy'
params.boost = 0.0
params.subsample = 1.0
params.year = params.gen

# set up a toy data set using the classes in test_flu_prediction
prediction = test_flu.predict_params(['mean_fitness', 'ladder_rank', 'polarizer'], params)
base_name, name_mod = test_flu.get_fname(params)

methods = [ ('fitness', '_ext', prediction.terminals),
            ('mean_fitness', '_ext', prediction.terminals),
            ('mean_fitness', '_int', prediction.non_terminals),
            ('ladder_rank', '', prediction.terminals),
            ('fitness','',prediction.terminals),
            ('polarizer', '_ext', prediction.terminals),
            ('polarizer', '_int', prediction.non_terminals)]  # last prediction is true fitness

distances, dist_epi_dummy, test_data = test_flu.evaluate(prediction, methods, params)
distances_list = [distances['average'],distances['minimal']]\
                    +[distances[m[0]+m[1]] for m in methods]

fitness_correlation_mf = stats.spearmanr([n.fitness for n in prediction.terminals],
for gamma in [1.0]:  #, 2.0, 3.0,5.0]:
    params.gamma = gamma
    for sdt in [1, 100]:
        file_base = params.base_name = data_dir + '_'.join(
            map(str,
                ['/N', N, 'L', L, 'nflip', nflip, 'mu', mu_sim, 'sdt', sdt
                 ])) + '/' + prefix + 'seqs'

        pred_label = params.sample_size, params.gamma, params.diffusion, params.omega, params.valdt * params.dt

        # make figure
        plt.figure(figsize=(12, 5))
        ### PLOT EXAMPLE PREDICTION
        print "run example prediction"
        prediction = test_flu.predict_params(['mean_fitness'], params)
        plt.subplot(131)
        plt.text(xpos, ypos, 'B', transform=plt.gca().transAxes, fontsize=36)
        true_fitness = np.array([n.fitness for n in prediction.terminals])
        true_fitness -= true_fitness.mean()
        estimated_fitness = np.array(
            [n.mean_fitness for n in prediction.terminals])
        estimated_fitness -= estimated_fitness.mean()
        spcorr = stats.spearmanr(true_fitness, estimated_fitness)
        im = np.zeros((prediction.nstates, prediction.nstates))
        for node, true_fit in zip(prediction.terminals, true_fitness):
            xi = np.argmin(
                prediction.fitness_grid < (true_fit - true_fitness.max()) /
                sigma + prediction.fitness_grid[-2])
            im[xi, :] += node.prob