Beispiel #1
0
def test_updating_q(mcmc_initial_state):
    rand = np.random.RandomState(2020)
    q_sum = 0.0
    for i in range(1000):
        AlgorithmSiteInstance._update_q(mcmc_initial_state.state, rand)
        q_sum += mcmc_initial_state.state.qq

    q_average = q_sum / 1000.0
    # TODO: Find a more robust way of testing this (since there's still a tiny chance it randomly falls outside this range?)
    assert 0.375 <= q_average <= 0.380
Beispiel #2
0
def test_updating_dvect(mcmc_initial_state):
    rand = np.random.RandomState(2020)
    dposterior_sum = 0
    dvect_sum = np.zeros(133)
    for i in range(1000):
        AlgorithmSiteInstance._update_dvect(mcmc_initial_state.state, rand)
        dposterior_sum += mcmc_initial_state.state.dposterior
        dvect_sum += mcmc_initial_state.state.dvect

    dpost_average = dposterior_sum / 1000.0
    dvect_average = dvect_sum / 1000.0
    assert 0.31 <= dpost_average <= 0.32
    np.testing.assert_approx_equal(dvect_average[0], dpost_average)
    assert 0.202 <= dvect_average[1] <= 0.206
    def _initialize_site_instances(self, genotypedata: pd.DataFrame,
                                   additional: pd.DataFrame,
                                   locirepeats: List[int]):
        '''
        Initializes the algorithm instances for each site in the processed
        data file
        :param locirepeats: TODO:
        '''
        self.site_names = pd.unique(genotypedata['Site'])
        self.algorithm_instances = []
        for site_name in self.site_names:
            # NOTE: "RR" stands for "recrudescence and/or reinfection"; it marks
            # datasets that deals specifically with day 0/day of failure info,
            # as opposed to background data
            site_genotypedata_RR = self._get_samples_from_site(
                genotypedata, site_name)
            site_additional_neutral = self._get_samples_from_site(
                additional, site_name)

            self._replace_sample_names(site_additional_neutral, 'Additional_')

            site_instance = AlgorithmSiteInstance(site_genotypedata_RR,
                                                  site_additional_neutral,
                                                  locirepeats)
            self.algorithm_instances.append((site_name, site_instance))
Beispiel #4
0
def test_likelihood_ratio_inner_loop_initial(mcmc_initial_state):
    expected_inner_values = np.array([8.265306, 8.265306, 8.265306, 8.265306])

    inner_values = AlgorithmSiteInstance._likelihood_inner_loop(
        mcmc_initial_state.state, 0)

    np.testing.assert_array_almost_equal(inner_values[0],
                                         expected_inner_values,
                                         decimal=5)
Beispiel #5
0
def test_likelihood_ratio(mcmc_initial_state):
    expected_likelihood_ratios = np.array([56.024203, 1.438889, 0.0])

    likelihood_ratios = AlgorithmSiteInstance._likelihood_ratios(
        mcmc_initial_state.state, mcmc_initial_state.num_ids,
        mcmc_initial_state.num_loci)

    np.testing.assert_array_almost_equal(likelihood_ratios,
                                         expected_likelihood_ratios,
                                         decimal=5)
Beispiel #6
0
def test_likelihood_ratio_inner_loop_middle(mcmc_initial_state):
    expected_inner_values = np.array(
        [0.7777778, 0.7777778, 0.7777778, 0.7777778])

    inner_values = AlgorithmSiteInstance._likelihood_inner_loop(
        mcmc_initial_state.state, 2)

    np.testing.assert_array_almost_equal(inner_values[1],
                                         expected_inner_values,
                                         decimal=5)
Beispiel #7
0
def test_likelihood_ratio_with_nans(mcmc_initial_state):
    expected_likelihood_ratios = np.array([88.459268, 1.438889, 0.0])

    mcmc_initial_state.state.alldistance[0, 1, 2:] = np.nan
    mcmc_initial_state.state.allrecrf[0, 1, 2:] = np.nan
    likelihood_ratios = AlgorithmSiteInstance._likelihood_ratios(
        mcmc_initial_state.state, mcmc_initial_state.num_ids,
        mcmc_initial_state.num_loci)

    np.testing.assert_array_almost_equal(likelihood_ratios,
                                         expected_likelihood_ratios,
                                         decimal=5)
Beispiel #8
0
def test_updating_classifications(mcmc_initial_state):
    expected_avg_classifications = np.array([1.0, 0.31, 0.0])

    original_classifications = np.copy(mcmc_initial_state.state.classification)
    classifications_sum = np.zeros(original_classifications.size)
    likelihood_ratios = np.array([56.024203, 1.438889, 0.0])
    rand = np.random.RandomState(2020)
    for i in range(1000):
        classifications = AlgorithmSiteInstance._update_classifications(
            mcmc_initial_state.state, likelihood_ratios,
            mcmc_initial_state.num_ids, rand)
        classifications_sum += classifications
        mcmc_initial_state.state.classification = original_classifications

    avg_classes = classifications_sum / 1000.0
    # TODO: Find a more robust way of testing this (since there's still a tiny chance it randomly falls outside this range?)
    np.testing.assert_array_almost_equal(avg_classes,
                                         expected_avg_classifications,
                                         decimal=2)
def test_max_MOI():
    maxMOI = AlgorithmSiteInstance._get_max_MOI(genotypedata_RR)
    assert maxMOI == expected_maxMOI
example_file = os.path.join(os.path.dirname(__file__),
                            '../Angola2017_example.xlsx')
genotypedata, additional = RecrudescenceFileParser.parse_file(example_file)
genotypedata_RR = AlgorithmInstance._get_samples_from_site(
    genotypedata, 'Benguela')
additional_neutral = AlgorithmInstance._replace_sample_names(
    AlgorithmInstance._get_samples_from_site(additional, 'Benguela'),
    'Additional_')

expected_maxMOI = 5
locirepeats = np.array([2, 2, 3, 3, 3, 3, 3])
expected_ids = pd.unique(
    ["BQ17-269", "BD17-040", "BD17-083", "BD17-085", "BD17-087", "BD17-090"])
expected_locinames = pd.unique(
    ["313", "383", "TA1", "POLYA", "PFPK2", "2490", "TA109"])
alleles_definitions_RR = AlgorithmSiteInstance._get_allele_definitions(
    genotypedata_RR, additional_neutral, expected_locinames.size, locirepeats)


def test_max_MOI():
    maxMOI = AlgorithmSiteInstance._get_max_MOI(genotypedata_RR)
    assert maxMOI == expected_maxMOI


def test_getting_ids():
    ids = recrudescence_utils.get_sample_ids(genotypedata_RR, 'Day 0')

    np.testing.assert_array_equal(ids, expected_ids)


def test_getting_locinames():
    locinames = pd.unique(genotypedata_RR.columns[1:].str.split("_").str[0])
Beispiel #11
0
def test_updating_frequencies(mcmc_initial_state):
    rand = np.random.RandomState(2020)

    AlgorithmSiteInstance._update_frequencies(mcmc_initial_state.state,
                                              mcmc_initial_state.num_loci,
                                              mcmc_initial_state.max_MOI, rand)