def test_import(self, tree, tmpdir): """Test import of frequencies JSON that was exported from a frequencies instance. """ start_date = 2015.5 end_date = 2018.5 kde_frequencies = KdeFrequencies(start_date=start_date, end_date=end_date) frequencies = kde_frequencies.estimate(tree) frequencies_json = kde_frequencies.to_json() # Try to dump exported JSON to disk. tmp_fh = tmpdir.mkdir("json").join("frequencies.json") fh = tmp_fh.open(mode="w") json.dump(frequencies_json, fh) fh.close() assert tmp_fh.check() # Import frequencies from existing tree and JSON. fh = tmp_fh.open() new_frequencies_json = json.load(fh) fh.close() new_kde_frequencies = KdeFrequencies.from_json(new_frequencies_json) assert np.array_equal(kde_frequencies.pivots, new_kde_frequencies.pivots) # Get the first non-root key (root clade is number 0) and should be first in the sorted list of keys. key = sorted(kde_frequencies.frequencies.keys())[1] assert np.array_equal(kde_frequencies.frequencies[key], new_kde_frequencies.frequencies[key])
def test_tip_and_internal_node_estimates(self, tree): """Test frequency estimation for tips and internal nodes in a given tree. """ # Estimate unweighted frequencies. kde_frequencies = KdeFrequencies(include_internal_nodes=True) frequencies = kde_frequencies.estimate(tree) # Verify that all tips and internal nodes have frequency estimates. assert all([tip.clade in frequencies for tip in tree.find_clades()])
def test_estimate(self, tree): """Test frequency estimation with default parameters. """ kde_frequencies = KdeFrequencies() frequencies = kde_frequencies.estimate(tree) assert hasattr(kde_frequencies, "pivots") assert np.around(kde_frequencies.pivots[1] - kde_frequencies.pivots[0], 2) == np.around(1 / 12.0, 2) assert hasattr(kde_frequencies, "frequencies") assert frequencies.values()[0].shape == kde_frequencies.pivots.shape
def test_import_without_frequencies(self): """Test import of frequencies JSON that was exported from a frequencies instance without frequency values. """ kde_frequencies = KdeFrequencies() frequencies_json = kde_frequencies.to_json() # Import frequencies from existing tree and JSON. new_kde_frequencies = KdeFrequencies.from_json(frequencies_json) assert kde_frequencies.pivot_frequency == new_kde_frequencies.pivot_frequency assert not hasattr(new_kde_frequencies, "frequencies")
def test_export_without_frequencies(self): """Test frequencies export to JSON when frequencies have *not* been estimated. """ kde_frequencies = KdeFrequencies() frequencies_json = kde_frequencies.to_json() assert "params" in frequencies_json assert kde_frequencies.pivot_frequency == frequencies_json["params"][ "pivot_frequency"] assert "node_filters" in frequencies_json["params"] assert "data" not in frequencies_json
def test_estimate_with_time_interval(self, tree): """Test frequency estimation with a given time interval. """ start_date = 2015.5 end_date = 2018.5 kde_frequencies = KdeFrequencies(start_date=start_date, end_date=end_date) frequencies = kde_frequencies.estimate(tree) assert hasattr(kde_frequencies, "pivots") assert kde_frequencies.pivots[0] == start_date assert hasattr(kde_frequencies, "frequencies") assert frequencies.values()[0].shape == kde_frequencies.pivots.shape
def test_get_params(self, tree): """Test export of parameters used to create an instance. """ initial_params = { "max_date": 2017.0, "start_date": 2015.5, "end_date": 2018.5 } kde_frequencies = KdeFrequencies(**initial_params) frequencies = kde_frequencies.estimate(tree) # Confirm that the exported parameters match the input. params = kde_frequencies.get_params() for param in initial_params: assert params[param] == initial_params[param]
def test_calculate_pivots_from_tree_only(self, tree): """Test pivot calculations. """ pivot_frequency = 0.25 pivots = KdeFrequencies.calculate_pivots(pivot_frequency, tree=tree) assert isinstance(pivots, np.ndarray) assert pivots[1] - pivots[0] == pivot_frequency
def test_export_with_frequencies(self, tree): """Test frequencies export to JSON when frequencies have been estimated. """ kde_frequencies = KdeFrequencies() frequencies = kde_frequencies.estimate(tree) frequencies_json = kde_frequencies.to_json() assert "params" in frequencies_json assert kde_frequencies.pivot_frequency == frequencies_json["params"][ "pivot_frequency"] assert kde_frequencies.start_date == frequencies_json["params"][ "start_date"] assert kde_frequencies.end_date == frequencies_json["params"][ "end_date"] assert "data" in frequencies_json assert "pivots" in frequencies_json["data"] assert "frequencies" in frequencies_json["data"]
def test_censored_frequencies(self, tree): """Test estimation of frequencies where tips sampled beyond a given date are censored from the calculations. """ max_date = 2017.0 kde_frequencies = KdeFrequencies(max_date=max_date) frequencies = kde_frequencies.estimate(tree) # Confirm that tips sampled after the max date have zero frequencies. assert all([ frequencies[tip.clade].sum() == 0 for tip in tree.get_terminals() if tip.attr["num_date"] > max_date ]) # Confirm that one or more tips sampled before the max date have nonzero frequencies. assert any([ frequencies[tip.clade].sum() > 0 for tip in tree.get_terminals() if tip.attr["num_date"] <= max_date ])
def test_only_tip_estimates(self, tree): """Test frequency estimation for only tips in a given tree. """ # Estimate unweighted frequencies. kde_frequencies = KdeFrequencies(include_internal_nodes=False) frequencies = kde_frequencies.estimate(tree) # Verify that all tips have frequency estimates and none of the internal nodes do. assert all([tip.clade in frequencies for tip in tree.get_terminals()]) assert not any( [node.clade in frequencies for node in tree.get_nonterminals()]) # Estimate weighted frequencies. weights = {region[0]: region[1] for region in REGIONS} kde_frequencies = KdeFrequencies(weights=weights, weights_attribute="region", include_internal_nodes=False) frequencies = kde_frequencies.estimate(tree) # Verify that all tips have frequency estimates and none of the internal nodes do. assert all([tip.clade in frequencies for tip in tree.get_terminals()]) assert not any( [node.clade in frequencies for node in tree.get_nonterminals()])
def simple_fitness_model(simple_tree): time_interval = (datetime.date(2015, 1, 1), datetime.date(2012, 1, 1)) start_date, end_date = process.get_time_interval_as_floats(time_interval) return fitness_model( tree=simple_tree, frequencies=KdeFrequencies(start_date=start_date, end_date=end_date, include_internal_nodes=True), predictor_input=["random"], pivot_spacing=1.0 / 12, time_interval=time_interval, epitope_masks_fname="builds/flu/metadata/ha_masks.tsv", epitope_mask_version="wolf")
def test_node_filter(self, tree): """Test frequency estimation with specific nodes omitted by setting their frequencies to zero at all pivots. """ # Filter nodes by region. regions = ["china"] kde_frequencies = KdeFrequencies(node_filters={"region": regions}) frequencies = kde_frequencies.estimate(tree) # Verify that all tips have frequency estimates regardless of node # filter. assert all([tip.clade in frequencies for tip in tree.get_terminals()]) # Verify that all tips from the requested region have non-zero frequencies. assert all([ frequencies[tip.clade].sum() > 0 for tip in tree.get_terminals() if tip.attr["region"] in regions ]) # Verify that all tips not from the requested region have zero frequencies. assert all([ frequencies[tip.clade].sum() == 0 for tip in tree.get_terminals() if tip.attr["region"] not in regions ])
def test_calculate_pivots_from_start_and_end_date(self): """ Test pivot calculation from a given start and end date instead of a given tree. """ pivot_frequency = 0.25 start_date = 2015.5 end_date = 2018.5 pivots = KdeFrequencies.calculate_pivots(pivot_frequency, start_date=start_date, end_date=end_date) assert isinstance(pivots, np.ndarray) assert pivots[1] - pivots[0] == pivot_frequency assert pivots[0] == start_date assert pivots[-1] == end_date assert pivots[-1] >= end_date - pivot_frequency
def precalculated_fitness_model(simple_tree): """Provides a simple fitness model with precalculated model parameters such that the model skips learning new parameters. """ time_interval = (datetime.date(2015, 1, 1), datetime.date(2012, 1, 1)) start_date, end_date = process.get_time_interval_as_floats(time_interval) return fitness_model( tree=simple_tree, frequencies=KdeFrequencies(start_date=start_date, end_date=end_date, include_internal_nodes=True), predictor_input={"random": MODEL_PARAMS}, pivot_spacing=1.0 / 12, time_interval=time_interval, epitope_masks_fname="builds/flu/metadata/ha_masks.tsv", epitope_mask_version="wolf")
def real_fitness_model(real_tree, multiple_sequence_alignment): time_interval = (datetime.date(2017, 6, 1), datetime.date(2014, 6, 1)) start_date, end_date = process.get_time_interval_as_floats(time_interval) model = fitness_model( tree=real_tree, frequencies=KdeFrequencies(start_date=start_date, end_date=end_date, include_internal_nodes=True), predictor_input=["random"], pivot_spacing=1.0 / 12, time_interval=time_interval, epitope_masks_fname="builds/flu/metadata/ha_masks.tsv", epitope_mask_version="wolf") model.nuc_aln = multiple_sequence_alignment model.nuc_alphabet = 'ACGT-N' model.min_mutation_frequency = 0.01 return model
def test_weighted_estimate(self, tree): """Test frequency estimation with weighted tips. """ # Estimate weighted frequencies. weights = {region[0]: region[1] for region in REGIONS} kde_frequencies = KdeFrequencies(weights=weights, weights_attribute="region") frequencies = kde_frequencies.estimate(tree) assert hasattr(kde_frequencies, "pivots") assert hasattr(kde_frequencies, "frequencies") assert frequencies.values()[0].shape == kde_frequencies.pivots.shape # Estimate unweighted frequencies to compare with weighted frequencies. unweighted_kde_frequencies = KdeFrequencies() unweighted_frequencies = unweighted_kde_frequencies.estimate(tree) # The any non-root node of the tree should have different frequencies with or without weighting. assert not np.array_equal(frequencies[1], unweighted_frequencies[1])
runner.build_tree() runner.timetree_setup_filter_run() runner.run_geo_inference() # estimate tree frequencies if runner.config["estimate_tree_frequencies"]: pivots = runner.get_pivots_via_spacing() runner.estimate_tree_frequencies(pivots=pivots) for regionTuple in runner.info["regions"]: runner.estimate_tree_frequencies(region=str(regionTuple[0])) # estimate KDE tip frequencies if runner.config["estimate_kde_frequencies"]: runner.pivots = runner.get_pivots_via_spacing() runner.kde_frequencies = KdeFrequencies.estimate_region_weighted_frequencies_for_tree( runner.tree.tree, runner.pivots, [el[0] for el in runner.info["regions"]], [el[2] for el in runner.info["regions"]]) if runner.info["segment"] == 'ha': if runner.info["lineage"] == 'h3n2': clades = ['3c2.A', 'A1', 'A1b/135K', 'A2', 'A3'] virus_clades = [ 'A1', 'A1a', 'A1b/135K', 'A1b/135N', 'A2', 'A3' ] serum_clades = [ '3c2.A', 'A1', 'A1a', 'A1b', 'A1b/135K', 'A1b/135N', 'A2', 'A3' ] elif runner.info["lineage"] == 'h1n1pdm': clades = ['6b.1', '6b.2', '164T'] virus_clades = clades
if runner.config["estimate_tree_frequencies"]: pivots = runner.get_pivots_via_spacing() runner.estimate_tree_frequencies(pivots=pivots) for regionTuple in runner.info["regions"]: runner.estimate_tree_frequencies(region=str(regionTuple[0])) # estimate KDE tip frequencies if runner.config["estimate_kde_frequencies"]: start_date, end_date = runner.get_time_interval_as_floats( runner.info["time_interval"]) kde_frequencies = KdeFrequencies( pivot_frequency=runner.config["pivot_spacing"], start_date=start_date, end_date=end_date, weights={ region[0]: region[2] for region in runner.info["regions"] }, weights_attribute="region", include_internal_nodes=False) kde_frequencies.estimate(runner.tree.tree) runner.kde_frequencies = kde_frequencies if runner.info["segment"] == 'ha': if runner.info["lineage"] == 'h3n2': clades = ['3c2.A', 'A1', 'A1b/135K', 'A2', 'A3'] virus_clades = [ 'A1b/135K', 'A1b/135N', 'A2', 'A2/re', 'A3', '3c3.A' ] serum_clades = [ '3c2.A', 'A1', 'A1a', 'A1b', 'A1b/135K', 'A1b/135N', 'A2',
parser.add_argument( "results", help="tab-delimited model results for all LBI parameters") args = parser.parse_args() # Load tree with open(args.tree, "r") as fh: json_tree = json.load(fh) tree = json_to_tree(json_tree) # Load frequencies with open(args.frequencies, "r") as fh: json_frequencies = json.load(fh) kde_frequencies = KdeFrequencies.from_json(json_frequencies) start_date = kde_frequencies.start_date end_date = kde_frequencies.end_date # Setup a model to test LBI # The initial model can be configured once and executed several times with # different parameters to avoid recalculating censored frequencies, etc. each # time. predictor_kwargs = {"tau": 0.75, "time_window": 0.75} masks_path = os.path.join(augur_path, "builds", "flu", "metadata", "ha_masks.tsv") model = FitnessModel(tree, kde_frequencies, ["lbi"],