def test_only_tip_estimates(self, tree):
        """Test frequency estimation for only tips in a given tree.
        """
        # Estimate unweighted frequencies.
        kde_frequencies = TreeKdeFrequencies(
            include_internal_nodes=False
        )
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips have frequency estimates and none of the internal nodes do.
        assert all([tip.name in frequencies
                    for tip in tree.get_terminals()])

        assert not any([node.clade in frequencies
                        for node in tree.get_nonterminals()])

        # Estimate weighted frequencies.
        weights = {region[0]: region[1] for region in REGIONS}
        kde_frequencies = TreeKdeFrequencies(
            weights=weights,
            weights_attribute="region",
            include_internal_nodes=False
        )
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips have frequency estimates and none of the internal nodes do.
        assert all([tip.name in frequencies
                    for tip in tree.get_terminals()])

        assert not any([node.clade in frequencies
                        for node in tree.get_nonterminals()])
Example #2
0
    def test_import(self, tree, tmpdir):
        """Test import of frequencies JSON that was exported from a frequencies instance.
        """
        start_date = 2015.5
        end_date = 2018.5
        kde_frequencies = TreeKdeFrequencies(start_date=start_date,
                                             end_date=end_date)
        frequencies = kde_frequencies.estimate(tree)
        frequencies_json = kde_frequencies.to_json()

        # Try to dump exported JSON to disk.
        tmp_fh = tmpdir.mkdir("json").join("frequencies.json")
        fh = tmp_fh.open(mode="w")
        json.dump(frequencies_json, fh)
        fh.close()
        assert tmp_fh.check()

        # Import frequencies from existing tree and JSON.
        fh = tmp_fh.open()
        new_frequencies_json = json.load(fh)
        fh.close()
        new_kde_frequencies = TreeKdeFrequencies.from_json(
            new_frequencies_json)

        assert np.array_equal(kde_frequencies.pivots,
                              new_kde_frequencies.pivots)

        # Get the first non-root key (root clade is number 0) and should be first in the sorted list of keys.
        key = sorted(kde_frequencies.frequencies.keys())[1]
        assert np.array_equal(kde_frequencies.frequencies[key],
                              new_kde_frequencies.frequencies[key])
    def test_node_filter(self, tree):
        """Test frequency estimation with specific nodes omitted by setting their
        frequencies to zero at all pivots.
        """
        # Filter nodes by region.
        regions = ["china"]
        kde_frequencies = TreeKdeFrequencies(
            node_filters={"region": regions}
        )
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips have frequency estimates regardless of node
        # filter.
        assert all([tip.name in frequencies
                    for tip in tree.get_terminals()])

        # Verify that all tips from the requested region have non-zero frequencies.
        assert all([frequencies[tip.name].sum() > 0
                    for tip in tree.get_terminals()
                    if tip.attr["region"] in regions])

        # Verify that all tips not from the requested region have zero frequencies.
        assert all([frequencies[tip.name].sum() == 0
                    for tip in tree.get_terminals()
                    if tip.attr["region"] not in regions])
    def test_weighted_estimate(self, tree):
        """Test frequency estimation with weighted tips.
        """
        # Estimate weighted frequencies.
        weights = {region[0]: region[1] for region in REGIONS}
        kde_frequencies = TreeKdeFrequencies(
            weights=weights,
            weights_attribute="region"
        )
        frequencies = kde_frequencies.estimate(tree)
        assert hasattr(kde_frequencies, "pivots")
        assert hasattr(kde_frequencies, "frequencies")
        assert list(frequencies.values())[0].shape == kde_frequencies.pivots.shape

        # Estimate unweighted frequencies to compare with weighted frequencies.
        unweighted_kde_frequencies = TreeKdeFrequencies()
        unweighted_frequencies = unweighted_kde_frequencies.estimate(tree)

        # Any non-root node of the tree should have different frequencies with
        # or without weighting.
        clade_to_test = tree.root.clades[0]
        assert not np.array_equal(
            frequencies[clade_to_test.name],
            unweighted_frequencies[clade_to_test.name]
        )
 def test_estimate(self, tree):
     """Test frequency estimation with default parameters.
     """
     kde_frequencies = TreeKdeFrequencies()
     frequencies = kde_frequencies.estimate(tree)
     assert hasattr(kde_frequencies, "pivots")
     assert np.around(kde_frequencies.pivots[1] - kde_frequencies.pivots[0], 2) == np.around(1 / 12.0, 2)
     assert hasattr(kde_frequencies, "frequencies")
     assert list(frequencies.values())[0].shape == kde_frequencies.pivots.shape
Example #6
0
    def test_tip_and_internal_node_estimates(self, tree):
        """Test frequency estimation for tips and internal nodes in a given tree.
        """
        # Estimate unweighted frequencies.
        kde_frequencies = TreeKdeFrequencies(include_internal_nodes=True)
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips and internal nodes have frequency estimates.
        assert all([tip.name in frequencies for tip in tree.find_clades()])
    def test_export_without_frequencies(self):
        """Test frequencies export to JSON when frequencies have *not* been estimated.
        """
        kde_frequencies = TreeKdeFrequencies()
        frequencies_json = kde_frequencies.to_json()

        assert "params" in frequencies_json
        assert kde_frequencies.pivot_frequency == frequencies_json["params"]["pivot_frequency"]
        assert "node_filters" in frequencies_json["params"]
        assert "data" not in frequencies_json
Example #8
0
    def test_export_without_frequencies(self):
        """Test frequencies export to JSON when frequencies have *not* been estimated.
        """
        kde_frequencies = TreeKdeFrequencies()
        frequencies_json = kde_frequencies.to_json()

        assert "params" in frequencies_json
        assert kde_frequencies.pivot_frequency == frequencies_json["params"][
            "pivot_frequency"]
        assert "node_filters" in frequencies_json["params"]
        assert "data" not in frequencies_json
Example #9
0
 def test_estimate(self, tree):
     """Test frequency estimation with default parameters.
     """
     kde_frequencies = TreeKdeFrequencies()
     frequencies = kde_frequencies.estimate(tree)
     assert hasattr(kde_frequencies, "pivots")
     assert np.around(kde_frequencies.pivots[1] - kde_frequencies.pivots[0],
                      2) == np.around(1 / 12.0, 2)
     assert hasattr(kde_frequencies, "frequencies")
     assert list(
         frequencies.values())[0].shape == kde_frequencies.pivots.shape
Example #10
0
    def test_import_without_frequencies(self):
        """Test import of frequencies JSON that was exported from a frequencies instance without frequency values.
        """
        kde_frequencies = TreeKdeFrequencies()
        frequencies_json = kde_frequencies.to_json()

        # Import frequencies from existing tree and JSON.
        new_kde_frequencies = TreeKdeFrequencies.from_json(frequencies_json)

        assert kde_frequencies.pivot_frequency == new_kde_frequencies.pivot_frequency
        assert not hasattr(new_kde_frequencies, "frequencies")
Example #11
0
    def test_import_without_frequencies(self):
        """Test import of frequencies JSON that was exported from a frequencies instance without frequency values.
        """
        kde_frequencies = TreeKdeFrequencies()
        frequencies_json = kde_frequencies.to_json()

        # Import frequencies from existing tree and JSON.
        new_kde_frequencies = TreeKdeFrequencies.from_json(frequencies_json)

        assert kde_frequencies.pivot_frequency == new_kde_frequencies.pivot_frequency
        assert not hasattr(new_kde_frequencies, "frequencies")
Example #12
0
    def test_tip_and_internal_node_estimates(self, tree):
        """Test frequency estimation for tips and internal nodes in a given tree.
        """
        # Estimate unweighted frequencies.
        kde_frequencies = TreeKdeFrequencies(
            include_internal_nodes=True
        )
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips and internal nodes have frequency estimates.
        assert all([tip.name in frequencies
                    for tip in tree.find_clades()])
Example #13
0
 def test_estimate_with_time_interval(self, tree):
     """Test frequency estimation with a given time interval.
     """
     start_date = 2015.5
     end_date = 2018.5
     kde_frequencies = TreeKdeFrequencies(start_date=start_date,
                                          end_date=end_date)
     frequencies = kde_frequencies.estimate(tree)
     assert hasattr(kde_frequencies, "pivots")
     assert kde_frequencies.pivots[0] == start_date
     assert hasattr(kde_frequencies, "frequencies")
     assert list(
         frequencies.values())[0].shape == kde_frequencies.pivots.shape
Example #14
0
 def test_estimate_with_time_interval(self, tree):
     """Test frequency estimation with a given time interval.
     """
     start_date = 2015.5
     end_date = 2018.5
     kde_frequencies = TreeKdeFrequencies(
         start_date=start_date,
         end_date=end_date
     )
     frequencies = kde_frequencies.estimate(tree)
     assert hasattr(kde_frequencies, "pivots")
     assert kde_frequencies.pivots[0] == start_date
     assert hasattr(kde_frequencies, "frequencies")
     assert list(frequencies.values())[0].shape == kde_frequencies.pivots.shape
Example #15
0
    def test_export_with_frequencies(self, tree):
        """Test frequencies export to JSON when frequencies have been estimated.
        """
        kde_frequencies = TreeKdeFrequencies()
        frequencies = kde_frequencies.estimate(tree)
        frequencies_json = kde_frequencies.to_json()

        assert "params" in frequencies_json
        assert kde_frequencies.pivot_frequency == frequencies_json["params"]["pivot_frequency"]
        assert kde_frequencies.start_date == frequencies_json["params"]["start_date"]
        assert kde_frequencies.end_date == frequencies_json["params"]["end_date"]
        assert "data" in frequencies_json
        assert "pivots" in frequencies_json["data"]
        assert "frequencies" in frequencies_json["data"]
Example #16
0
    def test_get_params(self, tree):
        """Test export of parameters used to create an instance.
        """
        initial_params = {
            "max_date": 2017.0,
            "start_date": 2015.5,
            "end_date": 2018.5
        }
        kde_frequencies = TreeKdeFrequencies(**initial_params)
        frequencies = kde_frequencies.estimate(tree)

        # Confirm that the exported parameters match the input.
        params = kde_frequencies.get_params()
        for param in initial_params:
            assert params[param] == initial_params[param]
Example #17
0
    def test_get_params(self, tree):
        """Test export of parameters used to create an instance.
        """
        initial_params = {
            "max_date": 2017.0,
            "start_date": 2015.5,
            "end_date": 2018.5
        }
        kde_frequencies = TreeKdeFrequencies(**initial_params)
        frequencies = kde_frequencies.estimate(tree)

        # Confirm that the exported parameters match the input.
        params = kde_frequencies.get_params()
        for param in initial_params:
            assert params[param] == initial_params[param]
Example #18
0
    def test_estimate(self, tree):
        """Test frequency estimation with default parameters.
        """
        kde_frequencies = TreeKdeFrequencies()
        frequencies = kde_frequencies.estimate(tree)
        assert hasattr(kde_frequencies, "pivots")
        assert np.around(kde_frequencies.pivots[1] - kde_frequencies.pivots[0],
                         2) == np.around(1 / 12.0, 2)
        assert hasattr(kde_frequencies, "frequencies")
        assert list(
            frequencies.values())[0].shape == kde_frequencies.pivots.shape

        # Frequencies should sum to 1 at all pivots.
        assert np.allclose(
            np.array(list(frequencies.values())).sum(axis=0),
            np.ones_like(kde_frequencies.pivots))
Example #19
0
    def test_export_with_frequencies(self, tree):
        """Test frequencies export to JSON when frequencies have been estimated.
        """
        kde_frequencies = TreeKdeFrequencies()
        frequencies = kde_frequencies.estimate(tree)
        frequencies_json = kde_frequencies.to_json()

        assert "params" in frequencies_json
        assert kde_frequencies.pivot_frequency == frequencies_json["params"][
            "pivot_frequency"]
        assert kde_frequencies.start_date == frequencies_json["params"][
            "start_date"]
        assert kde_frequencies.end_date == frequencies_json["params"][
            "end_date"]
        assert "data" in frequencies_json
        assert "pivots" in frequencies_json["data"]
        assert "frequencies" in frequencies_json["data"]
Example #20
0
    def test_censored_frequencies(self, tree):
        """Test estimation of frequencies where tips sampled beyond a given date are censored from the calculations.
        """
        max_date = 2017.0
        kde_frequencies = TreeKdeFrequencies(
            max_date=max_date
        )
        frequencies = kde_frequencies.estimate(tree)

        # Confirm that tips sampled after the max date have zero frequencies.
        assert all([frequencies[tip.name].sum() == 0
                    for tip in tree.get_terminals()
                    if tip.attr["num_date"] > max_date])

        # Confirm that one or more tips sampled before the max date have nonzero frequencies.
        assert any([frequencies[tip.name].sum() > 0
                    for tip in tree.get_terminals()
                    if tip.attr["num_date"] <= max_date])
Example #21
0
    def test_censored_frequencies(self, tree):
        """Test estimation of frequencies where tips sampled beyond a given date are censored from the calculations.
        """
        max_date = 2017.0
        kde_frequencies = TreeKdeFrequencies(max_date=max_date)
        frequencies = kde_frequencies.estimate(tree)

        # Confirm that tips sampled after the max date have zero frequencies.
        assert all([
            frequencies[tip.name].sum() == 0 for tip in tree.get_terminals()
            if tip.attr["num_date"] > max_date
        ])

        # Confirm that one or more tips sampled before the max date have nonzero frequencies.
        assert any([
            frequencies[tip.name].sum() > 0 for tip in tree.get_terminals()
            if tip.attr["num_date"] <= max_date
        ])
Example #22
0
    def test_weighted_estimate(self, tree):
        """Test frequency estimation with weighted tips.
        """
        # Estimate weighted frequencies.
        weights = {region[0]: region[1] for region in REGIONS}
        kde_frequencies = TreeKdeFrequencies(weights=weights,
                                             weights_attribute="region")
        frequencies = kde_frequencies.estimate(tree)
        assert hasattr(kde_frequencies, "pivots")
        assert hasattr(kde_frequencies, "frequencies")
        assert list(
            frequencies.values())[0].shape == kde_frequencies.pivots.shape

        # Frequencies should sum to 1 at all pivots.
        assert np.allclose(
            np.array(list(frequencies.values())).sum(axis=0),
            np.ones_like(kde_frequencies.pivots))

        # Estimate unweighted frequencies to compare with weighted frequencies.
        unweighted_kde_frequencies = TreeKdeFrequencies()
        unweighted_frequencies = unweighted_kde_frequencies.estimate(tree)

        # Any non-root node of the tree should have different frequencies with
        # or without weighting.
        clade_to_test = tree.root.clades[0]
        assert not np.array_equal(frequencies[clade_to_test.name],
                                  unweighted_frequencies[clade_to_test.name])
Example #23
0
    def test_only_tip_estimates(self, tree):
        """Test frequency estimation for only tips in a given tree.
        """
        # Estimate unweighted frequencies.
        kde_frequencies = TreeKdeFrequencies(include_internal_nodes=False)
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips have frequency estimates and none of the internal nodes do.
        assert all([tip.name in frequencies for tip in tree.get_terminals()])

        assert not any(
            [node.clade in frequencies for node in tree.get_nonterminals()])

        # Estimate weighted frequencies.
        weights = {region[0]: region[1] for region in REGIONS}
        kde_frequencies = TreeKdeFrequencies(weights=weights,
                                             weights_attribute="region",
                                             include_internal_nodes=False)
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips have frequency estimates and none of the internal nodes do.
        assert all([tip.name in frequencies for tip in tree.get_terminals()])

        assert not any(
            [node.clade in frequencies for node in tree.get_nonterminals()])
Example #24
0
    def test_import(self, tree, tmpdir):
        """Test import of frequencies JSON that was exported from a frequencies instance.
        """
        start_date = 2015.5
        end_date = 2018.5
        kde_frequencies = TreeKdeFrequencies(
            start_date=start_date,
            end_date=end_date
        )
        frequencies = kde_frequencies.estimate(tree)
        frequencies_json = kde_frequencies.to_json()

        # Try to dump exported JSON to disk.
        tmp_fh = tmpdir.mkdir("json").join("frequencies.json")
        fh = tmp_fh.open(mode="w")
        json.dump(frequencies_json, fh)
        fh.close()
        assert tmp_fh.check()

        # Import frequencies from existing tree and JSON.
        fh = tmp_fh.open()
        new_frequencies_json = json.load(fh)
        fh.close()
        new_kde_frequencies = TreeKdeFrequencies.from_json(new_frequencies_json)

        assert np.array_equal(
            kde_frequencies.pivots,
            new_kde_frequencies.pivots
        )

        # Get the first non-root key (root clade is number 0) and should be first in the sorted list of keys.
        key = sorted(kde_frequencies.frequencies.keys())[1]
        assert np.array_equal(
            kde_frequencies.frequencies[key],
            new_kde_frequencies.frequencies[key]
        )
Example #25
0
    def test_node_filter(self, tree):
        """Test frequency estimation with specific nodes omitted by setting their
        frequencies to zero at all pivots.
        """
        # Filter nodes by region.
        regions = ["china"]
        kde_frequencies = TreeKdeFrequencies(node_filters={"region": regions})
        frequencies = kde_frequencies.estimate(tree)

        # Verify that all tips have frequency estimates regardless of node
        # filter.
        assert all([tip.name in frequencies for tip in tree.get_terminals()])

        # Verify that all tips from the requested region have non-zero frequencies.
        assert all([
            frequencies[tip.name].sum() > 0 for tip in tree.get_terminals()
            if tip.attr["region"] in regions
        ])

        # Verify that all tips not from the requested region have zero frequencies.
        assert all([
            frequencies[tip.name].sum() == 0 for tip in tree.get_terminals()
            if tip.attr["region"] not in regions
        ])
Example #26
0
    def test_weighted_estimate_with_unrepresented_weights(self, tree):
        """Test frequency estimation with weighted tips when any of the weight
        attributes is unrepresented.

        In this case, normalization of frequencies to the proportions
        represented by the weights should be followed by a second normalization
        to sum to 1.
        """
        # Drop all tips sampled from Africa from the tree. Despite dropping a
        # populous region, the estimated frequencies should still sum to 1
        # below.
        tips_from_africa = [
            tip for tip in tree.find_clades(terminal=True)
            if tip.attr["region"] == "africa"
        ]
        for tip in tips_from_africa:
            tree.prune(tip)

        # Estimate weighted frequencies.
        weights = {region[0]: region[1] for region in REGIONS}
        kde_frequencies = TreeKdeFrequencies(weights=weights,
                                             weights_attribute="region")
        frequencies = kde_frequencies.estimate(tree)

        # Frequencies should sum to 1 at all pivots.
        assert np.allclose(
            np.array(list(frequencies.values())).sum(axis=0),
            np.ones_like(kde_frequencies.pivots))

        # Estimate weighted frequencies such that all weighted attributes are
        # missing. This should raise an exception because none of the tips will
        # match any of the weights and the weighting of frequencies will be
        # impossible.
        weights = {"fake_region_1": 1.0, "fake_region_2": 2.0}
        kde_frequencies = TreeKdeFrequencies(weights=weights,
                                             weights_attribute="region")

        with pytest.raises(TreeKdeFrequenciesError):
            frequencies = kde_frequencies.estimate(tree)
    parser.add_argument("--frequency-method", required=True, choices=["kde", "diffusion"], help="method used to estimate frequencies")
    parser.add_argument("--clades", help="JSON of clade annotations for nodes in the given tree")
    parser.add_argument("--delta-pivots", type=int, default=1, help="number of frequency pivots to look back in time for change in frequency calculation")
    parser.add_argument("--output", required=True, help="JSON of delta frequency annotations for nodes in the given tree")

    args = parser.parse_args()

    # Load the tree.
    tree = Bio.Phylo.read(args.tree, "newick")

    # Load frequencies.
    with open(args.frequencies, "r") as fh:
        frequencies_json = json.load(fh)

    if args.frequency_method == "kde":
        kde_frequencies = TreeKdeFrequencies.from_json(frequencies_json)
        frequencies = kde_frequencies.frequencies

        # Load clades.
        with open(args.clades, "r") as fh:
            clades_json = json.load(fh)

        clades_by_node = {
            key: value["clade_membership"]
            for key, value in clades_json["nodes"].items()
        }

        # Calculate the total frequency per clade at the most recent timepoint and
        # requested timepoint in the past using non-zero tip frequencies.
        current_clade_frequencies = defaultdict(float)
        previous_clade_frequencies = defaultdict(float)