コード例 #1
0
    def test_import(self, tree, tmpdir):
        """Test import of frequencies JSON that was exported from a frequencies instance.
        """
        start_date = 2015.5
        end_date = 2018.5
        kde_frequencies = KdeFrequencies(start_date=start_date,
                                         end_date=end_date)
        frequencies = kde_frequencies.estimate(tree)
        frequencies_json = kde_frequencies.to_json()

        # Try to dump exported JSON to disk.
        tmp_fh = tmpdir.mkdir("json").join("frequencies.json")
        fh = tmp_fh.open(mode="w")
        json.dump(frequencies_json, fh)
        fh.close()
        assert tmp_fh.check()

        # Import frequencies from existing tree and JSON.
        fh = tmp_fh.open()
        new_frequencies_json = json.load(fh)
        fh.close()
        new_kde_frequencies = KdeFrequencies.from_json(new_frequencies_json)

        assert np.array_equal(kde_frequencies.pivots,
                              new_kde_frequencies.pivots)

        # Get the first non-root key (root clade is number 0) and should be first in the sorted list of keys.
        key = sorted(kde_frequencies.frequencies.keys())[1]
        assert np.array_equal(kde_frequencies.frequencies[key],
                              new_kde_frequencies.frequencies[key])
コード例 #2
0
    def test_tip_and_internal_node_estimates(self, tree):
        """Test frequency estimation for tips and internal nodes in a given tree.
        """
        # Estimate unweighted frequencies.
        kde_frequencies = KdeFrequencies(include_internal_nodes=True)
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips and internal nodes have frequency estimates.
        assert all([tip.clade in frequencies for tip in tree.find_clades()])
コード例 #3
0
 def test_estimate(self, tree):
     """Test frequency estimation with default parameters.
     """
     kde_frequencies = KdeFrequencies()
     frequencies = kde_frequencies.estimate(tree)
     assert hasattr(kde_frequencies, "pivots")
     assert np.around(kde_frequencies.pivots[1] - kde_frequencies.pivots[0],
                      2) == np.around(1 / 12.0, 2)
     assert hasattr(kde_frequencies, "frequencies")
     assert frequencies.values()[0].shape == kde_frequencies.pivots.shape
コード例 #4
0
    def test_import_without_frequencies(self):
        """Test import of frequencies JSON that was exported from a frequencies instance without frequency values.
        """
        kde_frequencies = KdeFrequencies()
        frequencies_json = kde_frequencies.to_json()

        # Import frequencies from existing tree and JSON.
        new_kde_frequencies = KdeFrequencies.from_json(frequencies_json)

        assert kde_frequencies.pivot_frequency == new_kde_frequencies.pivot_frequency
        assert not hasattr(new_kde_frequencies, "frequencies")
コード例 #5
0
    def test_export_without_frequencies(self):
        """Test frequencies export to JSON when frequencies have *not* been estimated.
        """
        kde_frequencies = KdeFrequencies()
        frequencies_json = kde_frequencies.to_json()

        assert "params" in frequencies_json
        assert kde_frequencies.pivot_frequency == frequencies_json["params"][
            "pivot_frequency"]
        assert "node_filters" in frequencies_json["params"]
        assert "data" not in frequencies_json
コード例 #6
0
 def test_estimate_with_time_interval(self, tree):
     """Test frequency estimation with a given time interval.
     """
     start_date = 2015.5
     end_date = 2018.5
     kde_frequencies = KdeFrequencies(start_date=start_date,
                                      end_date=end_date)
     frequencies = kde_frequencies.estimate(tree)
     assert hasattr(kde_frequencies, "pivots")
     assert kde_frequencies.pivots[0] == start_date
     assert hasattr(kde_frequencies, "frequencies")
     assert frequencies.values()[0].shape == kde_frequencies.pivots.shape
コード例 #7
0
    def test_get_params(self, tree):
        """Test export of parameters used to create an instance.
        """
        initial_params = {
            "max_date": 2017.0,
            "start_date": 2015.5,
            "end_date": 2018.5
        }
        kde_frequencies = KdeFrequencies(**initial_params)
        frequencies = kde_frequencies.estimate(tree)

        # Confirm that the exported parameters match the input.
        params = kde_frequencies.get_params()
        for param in initial_params:
            assert params[param] == initial_params[param]
コード例 #8
0
 def test_calculate_pivots_from_tree_only(self, tree):
     """Test pivot calculations.
     """
     pivot_frequency = 0.25
     pivots = KdeFrequencies.calculate_pivots(pivot_frequency, tree=tree)
     assert isinstance(pivots, np.ndarray)
     assert pivots[1] - pivots[0] == pivot_frequency
コード例 #9
0
    def test_export_with_frequencies(self, tree):
        """Test frequencies export to JSON when frequencies have been estimated.
        """
        kde_frequencies = KdeFrequencies()
        frequencies = kde_frequencies.estimate(tree)
        frequencies_json = kde_frequencies.to_json()

        assert "params" in frequencies_json
        assert kde_frequencies.pivot_frequency == frequencies_json["params"][
            "pivot_frequency"]
        assert kde_frequencies.start_date == frequencies_json["params"][
            "start_date"]
        assert kde_frequencies.end_date == frequencies_json["params"][
            "end_date"]
        assert "data" in frequencies_json
        assert "pivots" in frequencies_json["data"]
        assert "frequencies" in frequencies_json["data"]
コード例 #10
0
    def test_censored_frequencies(self, tree):
        """Test estimation of frequencies where tips sampled beyond a given date are censored from the calculations.
        """
        max_date = 2017.0
        kde_frequencies = KdeFrequencies(max_date=max_date)
        frequencies = kde_frequencies.estimate(tree)

        # Confirm that tips sampled after the max date have zero frequencies.
        assert all([
            frequencies[tip.clade].sum() == 0 for tip in tree.get_terminals()
            if tip.attr["num_date"] > max_date
        ])

        # Confirm that one or more tips sampled before the max date have nonzero frequencies.
        assert any([
            frequencies[tip.clade].sum() > 0 for tip in tree.get_terminals()
            if tip.attr["num_date"] <= max_date
        ])
コード例 #11
0
    def test_only_tip_estimates(self, tree):
        """Test frequency estimation for only tips in a given tree.
        """
        # Estimate unweighted frequencies.
        kde_frequencies = KdeFrequencies(include_internal_nodes=False)
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips have frequency estimates and none of the internal nodes do.
        assert all([tip.clade in frequencies for tip in tree.get_terminals()])

        assert not any(
            [node.clade in frequencies for node in tree.get_nonterminals()])

        # Estimate weighted frequencies.
        weights = {region[0]: region[1] for region in REGIONS}
        kde_frequencies = KdeFrequencies(weights=weights,
                                         weights_attribute="region",
                                         include_internal_nodes=False)
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips have frequency estimates and none of the internal nodes do.
        assert all([tip.clade in frequencies for tip in tree.get_terminals()])

        assert not any(
            [node.clade in frequencies for node in tree.get_nonterminals()])
コード例 #12
0
ファイル: test_fitness_model.py プロジェクト: sclipman/augur
def simple_fitness_model(simple_tree):
    time_interval = (datetime.date(2015, 1, 1), datetime.date(2012, 1, 1))
    start_date, end_date = process.get_time_interval_as_floats(time_interval)

    return fitness_model(
        tree=simple_tree,
        frequencies=KdeFrequencies(start_date=start_date,
                                   end_date=end_date,
                                   include_internal_nodes=True),
        predictor_input=["random"],
        pivot_spacing=1.0 / 12,
        time_interval=time_interval,
        epitope_masks_fname="builds/flu/metadata/ha_masks.tsv",
        epitope_mask_version="wolf")
コード例 #13
0
    def test_node_filter(self, tree):
        """Test frequency estimation with specific nodes omitted by setting their
        frequencies to zero at all pivots.
        """
        # Filter nodes by region.
        regions = ["china"]
        kde_frequencies = KdeFrequencies(node_filters={"region": regions})
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips have frequency estimates regardless of node
        # filter.
        assert all([tip.clade in frequencies for tip in tree.get_terminals()])

        # Verify that all tips from the requested region have non-zero frequencies.
        assert all([
            frequencies[tip.clade].sum() > 0 for tip in tree.get_terminals()
            if tip.attr["region"] in regions
        ])

        # Verify that all tips not from the requested region have zero frequencies.
        assert all([
            frequencies[tip.clade].sum() == 0 for tip in tree.get_terminals()
            if tip.attr["region"] not in regions
        ])
コード例 #14
0
 def test_calculate_pivots_from_start_and_end_date(self):
     """
     Test pivot calculation from a given start and end date instead of a given tree.
     """
     pivot_frequency = 0.25
     start_date = 2015.5
     end_date = 2018.5
     pivots = KdeFrequencies.calculate_pivots(pivot_frequency,
                                              start_date=start_date,
                                              end_date=end_date)
     assert isinstance(pivots, np.ndarray)
     assert pivots[1] - pivots[0] == pivot_frequency
     assert pivots[0] == start_date
     assert pivots[-1] == end_date
     assert pivots[-1] >= end_date - pivot_frequency
コード例 #15
0
ファイル: test_fitness_model.py プロジェクト: sclipman/augur
def precalculated_fitness_model(simple_tree):
    """Provides a simple fitness model with precalculated model parameters such that
    the model skips learning new parameters.
    """
    time_interval = (datetime.date(2015, 1, 1), datetime.date(2012, 1, 1))
    start_date, end_date = process.get_time_interval_as_floats(time_interval)

    return fitness_model(
        tree=simple_tree,
        frequencies=KdeFrequencies(start_date=start_date,
                                   end_date=end_date,
                                   include_internal_nodes=True),
        predictor_input={"random": MODEL_PARAMS},
        pivot_spacing=1.0 / 12,
        time_interval=time_interval,
        epitope_masks_fname="builds/flu/metadata/ha_masks.tsv",
        epitope_mask_version="wolf")
コード例 #16
0
ファイル: test_fitness_model.py プロジェクト: sclipman/augur
def real_fitness_model(real_tree, multiple_sequence_alignment):
    time_interval = (datetime.date(2017, 6, 1), datetime.date(2014, 6, 1))
    start_date, end_date = process.get_time_interval_as_floats(time_interval)

    model = fitness_model(
        tree=real_tree,
        frequencies=KdeFrequencies(start_date=start_date,
                                   end_date=end_date,
                                   include_internal_nodes=True),
        predictor_input=["random"],
        pivot_spacing=1.0 / 12,
        time_interval=time_interval,
        epitope_masks_fname="builds/flu/metadata/ha_masks.tsv",
        epitope_mask_version="wolf")
    model.nuc_aln = multiple_sequence_alignment
    model.nuc_alphabet = 'ACGT-N'
    model.min_mutation_frequency = 0.01
    return model
コード例 #17
0
    def test_weighted_estimate(self, tree):
        """Test frequency estimation with weighted tips.
        """
        # Estimate weighted frequencies.
        weights = {region[0]: region[1] for region in REGIONS}
        kde_frequencies = KdeFrequencies(weights=weights,
                                         weights_attribute="region")
        frequencies = kde_frequencies.estimate(tree)
        assert hasattr(kde_frequencies, "pivots")
        assert hasattr(kde_frequencies, "frequencies")
        assert frequencies.values()[0].shape == kde_frequencies.pivots.shape

        # Estimate unweighted frequencies to compare with weighted frequencies.
        unweighted_kde_frequencies = KdeFrequencies()
        unweighted_frequencies = unweighted_kde_frequencies.estimate(tree)

        # The any non-root node of the tree should have different frequencies with or without weighting.
        assert not np.array_equal(frequencies[1], unweighted_frequencies[1])
コード例 #18
0
        runner.build_tree()
        runner.timetree_setup_filter_run()
        runner.run_geo_inference()

        # estimate tree frequencies
        if runner.config["estimate_tree_frequencies"]:
            pivots = runner.get_pivots_via_spacing()
            runner.estimate_tree_frequencies(pivots=pivots)
            for regionTuple in runner.info["regions"]:
                runner.estimate_tree_frequencies(region=str(regionTuple[0]))

        # estimate KDE tip frequencies
        if runner.config["estimate_kde_frequencies"]:
            runner.pivots = runner.get_pivots_via_spacing()
            runner.kde_frequencies = KdeFrequencies.estimate_region_weighted_frequencies_for_tree(
                runner.tree.tree, runner.pivots,
                [el[0] for el in runner.info["regions"]],
                [el[2] for el in runner.info["regions"]])

        if runner.info["segment"] == 'ha':
            if runner.info["lineage"] == 'h3n2':
                clades = ['3c2.A', 'A1', 'A1b/135K', 'A2', 'A3']
                virus_clades = [
                    'A1', 'A1a', 'A1b/135K', 'A1b/135N', 'A2', 'A3'
                ]
                serum_clades = [
                    '3c2.A', 'A1', 'A1a', 'A1b', 'A1b/135K', 'A1b/135N', 'A2',
                    'A3'
                ]
            elif runner.info["lineage"] == 'h1n1pdm':
                clades = ['6b.1', '6b.2', '164T']
                virus_clades = clades
コード例 #19
0
ファイル: flu.process.py プロジェクト: sclipman/augur
        if runner.config["estimate_tree_frequencies"]:
            pivots = runner.get_pivots_via_spacing()
            runner.estimate_tree_frequencies(pivots=pivots)
            for regionTuple in runner.info["regions"]:
                runner.estimate_tree_frequencies(region=str(regionTuple[0]))

        # estimate KDE tip frequencies
        if runner.config["estimate_kde_frequencies"]:
            start_date, end_date = runner.get_time_interval_as_floats(
                runner.info["time_interval"])

            kde_frequencies = KdeFrequencies(
                pivot_frequency=runner.config["pivot_spacing"],
                start_date=start_date,
                end_date=end_date,
                weights={
                    region[0]: region[2]
                    for region in runner.info["regions"]
                },
                weights_attribute="region",
                include_internal_nodes=False)
            kde_frequencies.estimate(runner.tree.tree)
            runner.kde_frequencies = kde_frequencies

        if runner.info["segment"] == 'ha':
            if runner.info["lineage"] == 'h3n2':
                clades = ['3c2.A', 'A1', 'A1b/135K', 'A2', 'A3']
                virus_clades = [
                    'A1b/135K', 'A1b/135N', 'A2', 'A2/re', 'A3', '3c3.A'
                ]
                serum_clades = [
                    '3c2.A', 'A1', 'A1a', 'A1b', 'A1b/135K', 'A1b/135N', 'A2',
コード例 #20
0
    parser.add_argument(
        "results", help="tab-delimited model results for all LBI parameters")

    args = parser.parse_args()

    # Load tree
    with open(args.tree, "r") as fh:
        json_tree = json.load(fh)

    tree = json_to_tree(json_tree)

    # Load frequencies
    with open(args.frequencies, "r") as fh:
        json_frequencies = json.load(fh)

    kde_frequencies = KdeFrequencies.from_json(json_frequencies)
    start_date = kde_frequencies.start_date
    end_date = kde_frequencies.end_date

    # Setup a model to test LBI

    # The initial model can be configured once and executed several times with
    # different parameters to avoid recalculating censored frequencies, etc. each
    # time.

    predictor_kwargs = {"tau": 0.75, "time_window": 0.75}
    masks_path = os.path.join(augur_path, "builds", "flu", "metadata",
                              "ha_masks.tsv")

    model = FitnessModel(tree,
                         kde_frequencies, ["lbi"],