def test_single_censored_transfers_to_masked_on_setting(): va = Record() assert not va.masked va.censored = 1 assert 1 in va.masked assert va.censored == {1}
def test_set_censored_multiple_times(): va = Record() va.censored = 2 assert va.censored == {2} assert 2 in va.masked va.censored = 3 assert va.censored == {3} assert 3 in va.masked
def test_mask_ranks(prep, censored, scores, ranks, expected): prep.cause_list = scores.keys() va = Record(scores=scores, censored=censored) va.ranks = ranks uniform = range(1000) # just needs length cutoffs = dict(zip(scores.keys(), [99999] * len(scores))) demog_restrictions = {} lowest_rank = 9999 # value set if restricted uniform_list_pos = 999 min_cause_score = defaultdict(lambda: 0) prep.mask_ranks([va], uniform, cutoffs, demog_restrictions, lowest_rank, uniform_list_pos, min_cause_score) assert va.ranks == expected
def test_csmf_summed_to_one(prep): causes = ['a', 'b', 'c'] counts = np.random.randint(10, 100, 3) user_data = [Record('sid{}'.format(i), age=35, sex=i % 2 + 1, cause=cause) for i, cause in enumerate(causes) for i in range(counts[i])] csmf, csmf_by_sex = prep.calculate_csmf(user_data, []) assert np.allclose(sum(csmf.values()), 1) for sex, csmf_data in csmf_by_sex.items(): assert np.allclose(sum(csmf_data.values()), 1)
def test_generate_cause_rankings(prep): prep.cause_list = [1] train_scores = [ 100, # rank 1 50, # rank 2 15, # rank 3 (triplicate) 15, # rank 4 (triplicate) 15, # rank 5 (triplicate) 10, # rank 6 (duplicate positive) 10, # rank 7 (duplicate positive) -2, # rank 8 (duplicate negative) -2, # rank 9 (duplicate negative) -3, # rank 10 ] train_data = [Record(scores={1: s}) for s in train_scores] uniform_scores = {1: np.sort([va.scores[1] for va in train_data])} # Score, Rank within training tests = [ (110, 0.5), # above highest score in train data (100, 1), # at highest score in train data (90, 1.5), (50, 2), # at a value which exists in the train data (15, 4), # at triplicate value in the train data (11, 5.5), (10, 6.5), # at a duplicate value in the train data (0, 7.5), # zero (just in case) (-1, 7.5), # negative value in range of train scores max to min (-3, 10), # at lowest score in train data (-5, 10.5), # below lowest score in train data ] test_data = [Record(scores={1: score}) for score, rank in tests] # Modifies list of Records in place and doesn't return anything prep.generate_cause_rankings(test_data, uniform_scores) predicted_test_ranks = [va.ranks[1] for va in test_data] actual_test_ranks = [float(rank) for score, rank in tests] assert predicted_test_ranks == actual_test_ranks
def test_csmf_sex_undetermined_unknown_age(prep): """ Redistributed CSMFs for undetermined causes of death should not include biologically impossible causes for males and females. """ male_drops = [] female_drops = [] # remove sex specific drops male_drops.extend(prep.data_module.FEMALE_CAUSES) male_drops.extend(prep.data_module.MATERNAL_CAUSES) female_drops.extend(prep.data_module.MALE_CAUSES) # male and female user data with undetermined cause and age zero (unknown age) user_data_male = [ Record('sid{}'.format(i), age=0, sex=1, cause34_name='Undetermined') for i in range(7) ] user_data_female = [ Record('sid{}'.format(i), age=0, sex=2, cause34_name='Undetermined') for i in range(7) ] user_data_unknown = [ Record('sid{}'.format(i), age=0, sex=0, cause34_name='Undetermined') for i in range(7) ] user_data = user_data_male + user_data_female + user_data_unknown undetermined_weights = prep._get_undetermined_matrix() csmf, csmf_by_sex = prep.calculate_csmf(user_data, undetermined_weights) for sex, csmf_data in csmf_by_sex.items(): # check female causes not in male csmfs and vice versa if sex == 1: assert len(list(set(csmf_data.keys()) & set(male_drops))) == 0 if sex == 2: assert len(list(set(csmf_data.keys()) & set(female_drops))) == 0
def test_csmf_summed_to_one(prep, malaria, hiv): prep.malaria_region = malaria prep.hiv_region = hiv causes = prep.data_module.CAUSES.values() user_data = [Record('sid{}'.format(i), age=.1, sex=i % 2 + 1, cause=cause) for i in range(7) for cause in causes] undetermined_weights = prep._get_undetermined_matrix() csmf, csmf_by_sex = prep.calculate_csmf(user_data, undetermined_weights) assert np.allclose(sum(csmf.values()), 1) for sex, csmf_data in csmf_by_sex.items(): assert np.allclose(sum(csmf_data.values()), 1)
def test_csmf_sex_undetermined_plot(prep): """ Redistributed CSMFs for undetermined causes of death should not include biologically impossible causes for males and females. Check that these causes are not included in the CSMF figures. """ # male and female user data with undetermined cause and age zero (unknown age) user_data_male = [Record('sid{}'.format(i), age=0, sex=1, cause34_name='Undetermined') for i in range(7)] user_data_female = [Record('sid{}'.format(i), age=0, sex=2, cause34_name='Undetermined') for i in range(7)] user_data_unknown = [Record('sid{}'.format(i), age=0, sex=0, cause34_name='Undetermined') for i in range(7)] user_data = user_data_male + user_data_female + user_data_unknown undetermined_weights = prep._get_undetermined_matrix() csmf, csmf_by_sex = prep.calculate_csmf(user_data, undetermined_weights) for sex in range(1, 2): # input graph data graph_data = csmf_by_sex[sex] cause_keys = graph_data.keys() cause_fractions = graph_data.values() #graph_title = module_key.capitalize() + ' CSMF' # not neessary to have the graph title # graph_filename = graph_title.replace(' ', '-').lower() # not neccesary to have filename max_value = max(cause_fractions) xlocations = np.arange(len(cause_keys)) # the x locations for the groups bar_width = .75 # the width of the bars # Interactive mode off. plt.ioff() fig, ax = plt.subplots() ax.set_ylabel('Mortality fractions') ax.yaxis.grid() ax.set_xticklabels(cause_keys) ax.set_xticks(xlocations) bar_width = .75 # the width of the bars # Interactive mode off. plt.ioff() fig, ax = plt.subplots() #ax.set_title(graph_title) # not neessary to have the graph title ax.set_ylabel('Mortality fractions') ax.yaxis.grid() ax.set_xticklabels(cause_keys, rotation=90) ax.set_xticks(xlocations) ax.bar(xlocations, cause_fractions, bar_width, color='#C44440', align='center') # Add whitespace at top of bar. ax.set_ylim(top=max_value + max_value * 0.1) # Add whitespace before first bar and after last. plt.xlim([min(xlocations) - .5, max(xlocations) + 1.0]) # Add some spacing for rotated xlabels. plt.subplots_adjust(bottom=0.60) # neccessary to get acutally plot to get x-axis labels fig.canvas.draw() # list of x axis labels labels = [item.get_text() for item in ax.get_xticklabels()] # check biologically impossible causes if sex == 1: assert "Stroke" in labels assert "Prostate Cancer" in labels assert "Maternal" not in labels if sex == 2: assert "Stroke" in labels assert "Maternal" in labels assert "Cervical Cancer" in labels assert "Prostate Cancer" not in labels
def test_reporter(): va = Record() print va
def test_multiple_censored_transfers_to_masked_on_setting(): va = Record() va.censored = [0, 1, 2, 3] for x in range(4): assert x in va.masked assert va.censored == {0, 1, 2, 3}
def test_censored_transfers_to_masked_on_init(): va = Record(censored=(2, 3)) assert 2 in va.masked assert 3 in va.masked
uniform = range(1000) # just needs length cutoffs = dict(zip(scores.keys(), [99999] * len(scores))) demog_restrictions = {} lowest_rank = 9999 # value set if restricted uniform_list_pos = 999 min_cause_score = defaultdict(lambda: 0) prep.mask_ranks([va], uniform, cutoffs, demog_restrictions, lowest_rank, uniform_list_pos, min_cause_score) assert va.ranks == expected @pytest.mark.parametrize('va, cause, cause_name', [ (Record(sid='rules', cause=1), 11, 'c11'), (Record(sid='rules2', cause=2), 12, 'c12'), (Record(sid='ranks', ranks={1: 1, 2: 2}), 11, 'c11'), (Record(sid='rules_bad', cause='x', ranks={1: 1, 2: 2}), 11, 'c11'), (Record(sid='tie', ranks={1: 1, 2: 1, 3: 2}), 11, 'c11'), (Record(sid='lowest', ranks={1: 999, 2: 999, 3: 999}), None, 'Undetermined'), ]) def test_predict_with_rule(prep, va, cause, cause_name): user_data = [va] cause_reduction = {1: 11, 2: 12, 3: 13, 4: 14} names34 = {11: 'c11', 12: 'c12', 13: 'c13', 14: 'c14'} names46 = {1: 'c1', 2: 'c2', 3: 'c3', 4: 'c4'} prep.predict(user_data, 999, cause_reduction, names34, names46) assert va.cause34 == cause