def test_single_censored_transfers_to_masked_on_setting():
    va = Record()
    assert not va.masked

    va.censored = 1
    assert 1 in va.masked
    assert va.censored == {1}
def test_set_censored_multiple_times():
    va = Record()
    va.censored = 2
    assert va.censored == {2}
    assert 2 in va.masked

    va.censored = 3
    assert va.censored == {3}
    assert 3 in va.masked
Esempio n. 3
0
def test_mask_ranks(prep, censored, scores, ranks, expected):
    prep.cause_list = scores.keys()

    va = Record(scores=scores, censored=censored)
    va.ranks = ranks

    uniform = range(1000)  # just needs length
    cutoffs = dict(zip(scores.keys(), [99999] * len(scores)))
    demog_restrictions = {}
    lowest_rank = 9999   # value set if restricted
    uniform_list_pos = 999
    min_cause_score = defaultdict(lambda: 0)

    prep.mask_ranks([va], uniform, cutoffs, demog_restrictions, lowest_rank,
                    uniform_list_pos, min_cause_score)

    assert va.ranks == expected
Esempio n. 4
0
def test_csmf_summed_to_one(prep):
    causes = ['a', 'b', 'c']
    counts = np.random.randint(10, 100, 3)

    user_data = [Record('sid{}'.format(i), age=35, sex=i % 2 + 1, cause=cause)
                 for i, cause in enumerate(causes) for i in range(counts[i])]

    csmf, csmf_by_sex = prep.calculate_csmf(user_data, [])

    assert np.allclose(sum(csmf.values()), 1)
    for sex, csmf_data in csmf_by_sex.items():
        assert np.allclose(sum(csmf_data.values()), 1)
Esempio n. 5
0
def test_generate_cause_rankings(prep):
    prep.cause_list = [1]

    train_scores = [
        100,   # rank 1
        50,   # rank 2
        15,   # rank 3 (triplicate)
        15,   # rank 4 (triplicate)
        15,   # rank 5 (triplicate)
        10,   # rank 6 (duplicate positive)
        10,   # rank 7 (duplicate positive)
        -2,   # rank 8 (duplicate negative)
        -2,   # rank 9 (duplicate negative)
        -3,   # rank 10
    ]
    train_data = [Record(scores={1: s}) for s in train_scores]
    uniform_scores = {1: np.sort([va.scores[1] for va in train_data])}

    # Score, Rank within training
    tests = [
        (110, 0.5),  # above highest score in train data
        (100, 1),  # at highest score in train data
        (90, 1.5),
        (50, 2),   # at a value which exists in the train data
        (15, 4),   # at triplicate value in the train data
        (11, 5.5),
        (10, 6.5), # at a duplicate value in the train data
        (0, 7.5),  # zero (just in case)
        (-1, 7.5), # negative value in range of train scores max to min
        (-3, 10),   # at lowest score in train data
        (-5, 10.5),   # below lowest score in train data
    ]
    test_data = [Record(scores={1: score}) for score, rank in tests]

    # Modifies list of Records in place and doesn't return anything
    prep.generate_cause_rankings(test_data, uniform_scores)

    predicted_test_ranks = [va.ranks[1] for va in test_data]
    actual_test_ranks = [float(rank) for score, rank in tests]
    assert predicted_test_ranks == actual_test_ranks
Esempio n. 6
0
def test_csmf_sex_undetermined_unknown_age(prep):
    """
    Redistributed CSMFs for undetermined causes of death should not include
    biologically impossible causes for males and females.
    """
    male_drops = []
    female_drops = []

    # remove sex specific drops
    male_drops.extend(prep.data_module.FEMALE_CAUSES)
    male_drops.extend(prep.data_module.MATERNAL_CAUSES)
    female_drops.extend(prep.data_module.MALE_CAUSES)

    # male and female user data with undetermined cause and age zero (unknown age)
    user_data_male = [
        Record('sid{}'.format(i), age=0, sex=1, cause34_name='Undetermined')
        for i in range(7)
    ]
    user_data_female = [
        Record('sid{}'.format(i), age=0, sex=2, cause34_name='Undetermined')
        for i in range(7)
    ]
    user_data_unknown = [
        Record('sid{}'.format(i), age=0, sex=0, cause34_name='Undetermined')
        for i in range(7)
    ]

    user_data = user_data_male + user_data_female + user_data_unknown

    undetermined_weights = prep._get_undetermined_matrix()
    csmf, csmf_by_sex = prep.calculate_csmf(user_data, undetermined_weights)

    for sex, csmf_data in csmf_by_sex.items():
        # check female causes not in male csmfs and vice versa
        if sex == 1:
            assert len(list(set(csmf_data.keys()) & set(male_drops))) == 0
        if sex == 2:
            assert len(list(set(csmf_data.keys()) & set(female_drops))) == 0
Esempio n. 7
0
def test_csmf_summed_to_one(prep, malaria, hiv):
    prep.malaria_region = malaria
    prep.hiv_region = hiv
    causes = prep.data_module.CAUSES.values()

    user_data = [Record('sid{}'.format(i), age=.1, sex=i % 2 + 1, cause=cause)
                 for i in range(7) for cause in causes]

    undetermined_weights = prep._get_undetermined_matrix()
    csmf, csmf_by_sex = prep.calculate_csmf(user_data, undetermined_weights)

    assert np.allclose(sum(csmf.values()), 1)
    for sex, csmf_data in csmf_by_sex.items():
        assert np.allclose(sum(csmf_data.values()), 1)
def test_csmf_sex_undetermined_plot(prep):
    """
    Redistributed CSMFs for undetermined causes of death should not include
    biologically impossible causes for males and females. Check that these
    causes are not included in the CSMF figures.
    """

    # male and female user data with undetermined cause and age zero (unknown age)
    user_data_male = [Record('sid{}'.format(i), age=0, sex=1, cause34_name='Undetermined')
                        for i in range(7)]
    user_data_female = [Record('sid{}'.format(i), age=0, sex=2, cause34_name='Undetermined')
                        for i in range(7)]
    user_data_unknown = [Record('sid{}'.format(i), age=0, sex=0, cause34_name='Undetermined')
                        for i in range(7)]

    user_data = user_data_male + user_data_female + user_data_unknown

    undetermined_weights = prep._get_undetermined_matrix()
    csmf, csmf_by_sex = prep.calculate_csmf(user_data, undetermined_weights)

    for sex in range(1, 2):
        # input graph data
        graph_data = csmf_by_sex[sex]

        cause_keys = graph_data.keys()
        cause_fractions = graph_data.values()

        #graph_title = module_key.capitalize() + ' CSMF' # not neessary to have the graph title
        # graph_filename = graph_title.replace(' ', '-').lower() # not neccesary to have filename

        max_value = max(cause_fractions)
        xlocations = np.arange(len(cause_keys))  # the x locations for the groups

        bar_width = .75  # the width of the bars

        # Interactive mode off.
        plt.ioff()
        fig, ax = plt.subplots()

        ax.set_ylabel('Mortality fractions')
        ax.yaxis.grid()

        ax.set_xticklabels(cause_keys)
        ax.set_xticks(xlocations)

        bar_width = .75  # the width of the bars

        # Interactive mode off.
        plt.ioff()
        fig, ax = plt.subplots()

        #ax.set_title(graph_title) # not neessary to have the graph title
        ax.set_ylabel('Mortality fractions')
        ax.yaxis.grid()

        ax.set_xticklabels(cause_keys, rotation=90)
        ax.set_xticks(xlocations)

        ax.bar(xlocations, cause_fractions, bar_width, color='#C44440', align='center')

        # Add whitespace at top of bar.
        ax.set_ylim(top=max_value + max_value * 0.1)

        # Add whitespace before first bar and after last.
        plt.xlim([min(xlocations) - .5, max(xlocations) + 1.0])

        # Add some spacing for rotated xlabels.
        plt.subplots_adjust(bottom=0.60)

        # neccessary to get acutally plot to get x-axis labels
        fig.canvas.draw()

        # list of x axis labels
        labels = [item.get_text() for item in ax.get_xticklabels()]

        # check biologically impossible causes
        if sex == 1:
            assert "Stroke" in labels
            assert "Prostate Cancer" in labels
            assert "Maternal" not in labels

        if sex == 2:
            assert "Stroke" in labels
            assert "Maternal" in labels
            assert "Cervical Cancer" in labels
            assert "Prostate Cancer" not in labels
def test_reporter():
    va = Record()
    print va
def test_multiple_censored_transfers_to_masked_on_setting():
    va = Record()
    va.censored = [0, 1, 2, 3]
    for x in range(4):
        assert x in va.masked
    assert va.censored == {0, 1, 2, 3}
def test_censored_transfers_to_masked_on_init():
    va = Record(censored=(2, 3))
    assert 2 in va.masked
    assert 3 in va.masked
Esempio n. 12
0
    uniform = range(1000)  # just needs length
    cutoffs = dict(zip(scores.keys(), [99999] * len(scores)))
    demog_restrictions = {}
    lowest_rank = 9999   # value set if restricted
    uniform_list_pos = 999
    min_cause_score = defaultdict(lambda: 0)

    prep.mask_ranks([va], uniform, cutoffs, demog_restrictions, lowest_rank,
                    uniform_list_pos, min_cause_score)

    assert va.ranks == expected


@pytest.mark.parametrize('va, cause, cause_name', [
    (Record(sid='rules', cause=1), 11, 'c11'),
    (Record(sid='rules2', cause=2), 12, 'c12'),
    (Record(sid='ranks', ranks={1: 1, 2: 2}), 11, 'c11'),
    (Record(sid='rules_bad', cause='x', ranks={1: 1, 2: 2}), 11, 'c11'),
    (Record(sid='tie', ranks={1: 1, 2: 1, 3: 2}), 11, 'c11'),
    (Record(sid='lowest', ranks={1: 999, 2: 999, 3: 999}), None,
     'Undetermined'),
])
def test_predict_with_rule(prep, va, cause, cause_name):
    user_data = [va]
    cause_reduction = {1: 11, 2: 12, 3: 13, 4: 14}
    names34 = {11: 'c11', 12: 'c12', 13: 'c13', 14: 'c14'}
    names46 = {1: 'c1', 2: 'c2', 3: 'c3', 4: 'c4'}
    prep.predict(user_data, 999, cause_reduction, names34, names46)

    assert va.cause34 == cause