def rebin_matrix_by_age(matrix, datadir, location="seattle_metro", state_location="Washington", country_location="usa"): """ Helper method to get the average of contact matrix by age brackets @TODO: should we merge the functionalities with sp.get_aggregate_matrix or remove as this operation may not be scientifically meaningful (?) Args: matrix : raw matrix with single age bracket datadir : data directory state_location : state location country_location : country location Returns: numpy.ndarray: A matrix with desired age bracket with average values for all cells. """ brackets = sp.get_census_age_brackets(datadir, location, state_location, country_location) ageindex = sp.get_age_by_brackets_dic(brackets) agg_matrix = sp.get_aggregate_matrix(matrix, ageindex) counter = Counter(ageindex.values()) # number of ageindex per bracket for i in range(0, len(counter)): for j in range(0, len(counter)): agg_matrix[i, j] /= (counter[i] * counter[j]) return agg_matrix
def test_generate_all_households(location='seattle_metro', state_location='Washington', country_location='usa'): N = 1000 household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location) hh_sizes = sp.generate_household_sizes_from_fixed_pop_size( N, household_size_distr) hha_brackets = sp.get_head_age_brackets(datadir, country_location=country_location) hha_by_size_counts = sp.get_head_age_by_size_distr( datadir, country_location=country_location) age_brackets_filepath = sp.get_census_age_brackets_path( datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic( datadir, sheet_name='United States of America') single_year_age_distr = {} for n in range(101): single_year_age_distr[n] = float(1.0 / 101.0) homes_dic, homes = sp.generate_all_households( N, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets, age_by_brackets_dic, contact_matrix_dic, single_year_age_distr) assert homes_dic, homes is not None
def get_average_contact_by_age(pop, datadir, state_location="Washington", country_location="usa", setting_code="H", decimal=3): """ Helper method to get average contacts by age brackets Args: pop : population dictionary datadir : data directory to look up reference data state_location : state location country_location : country location setting_code : contact layer code, can be "H", "W", "S" decimal : digits for rounding, default to 3 Returns: numpy.ndarray: A numpy array with average contacts by age brackets. """ brackets = sp.get_census_age_brackets(datadir, state_location, country_location) ageindex = sp.get_age_by_brackets_dic(brackets) total = np.zeros(len(brackets)) contacts = np.zeros(len(brackets)) for p in pop.values(): total[ageindex[p["age"]]] += 1 contacts[ageindex[p["age"]]] += len(p["contacts"][setting_code]) average = np.round(np.divide(contacts, total), decimals=decimal) return average
def test_custom_age_brackets(self): self.is_debugging = False college_years = list(range(19, 23)) early_career = list(range(23, 30)) mid_career = list(range(30, 50)) late_career = list(range(50, 65)) retirement = list(range(65, 80)) managed_care = list(range(80, 100)) my_age_brackets = { 0: [0, 1], 1: [2, 3, 4], 2: [5, 6, 7, 8, 9, 10, 11], 3: [12, 13, 14], 4: [15, 16, 17, 18], 5: college_years, 6: early_career, 7: mid_career, 8: late_career, 9: retirement, 10: managed_care } age_by_brackets_dic = sp.get_age_by_brackets_dic( age_brackets=my_age_brackets) self.verify_age_bracket_dictionary_correct( age_by_brackets_dic=age_by_brackets_dic) pass
def test_plot_generated_trimmed_contact_matrix(setting_code='H', n=5000, aggregate_flag=True, logcolors_flag=True, density_or_frequency='density'): datadir = sp.datadir state_location = 'Washington' location = 'seattle_metro' country_location = 'usa' popdict = {} options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} contacts = sp.make_contacts(popdict, state_location=state_location, location=location, options_args=options_args, network_distr_args=network_distr_args) contacts = sp.trim_contacts(contacts, trimmed_size_dic=None, use_clusters=False) age_brackets = sp.get_census_age_brackets(datadir, state_location=state_location, country_location=country_location) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) ages = [] for uid in contacts: ages.append(contacts[uid]['age']) age_count = Counter(ages) aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic) freq_matrix_dic = sp.calculate_contact_matrix(contacts, density_or_frequency) fig = sp.plot_contact_frequency(freq_matrix_dic, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic, setting_code, density_or_frequency, logcolors_flag, aggregate_flag) return fig
def test_generate_larger_households(location='seattle_metro', state_location='Washington', country_location='usa'): Nhomes_to_sample_smooth = 1000 household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location) hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth, household_size_distr) hha_brackets = sp.get_head_age_brackets(datadir, country_location=country_location) hha_by_size_counts = sp.get_head_age_by_size_distr( datadir, country_location=country_location) age_brackets_filepath = sp.get_census_age_brackets_path( datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic( datadir, sheet_name='United States of America') single_year_age_distr = {} for n in range(101): single_year_age_distr[n] = float(1.0 / 101.0) # generate households of size 3 size = 3 # first variable is the household size to be created, so here this means we want to create all households of size 3 and the hh_sizes variable tells us how many of size 3 will be created at index 3-1 (since hh_sizes is an array rather than a dictionary) larger_households = sp.generate_larger_households( size, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets, age_by_brackets_dic, contact_matrix_dic, single_year_age_distr) assert larger_households is not None print(larger_households)
def test_send_students_to_school(n=10000, location='seattle_metro', state_location='Washington', country_location='usa', folder_name='contact_networks'): homes = sprw.read_setting_groups(datadir, location, state_location, country_location, folder_name, 'households', n, with_ages=True) homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes) uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school( datadir, n, location, state_location, country_location, age_by_uid_dic, homes_by_uids, use_default=False) school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets( datadir, location, state_location, country_location) school_size_brackets = sp.get_school_size_brackets(datadir, location, state_location, country_location) school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket, school_size_brackets, uids_in_school) age_brackets_filepath = sp.get_census_age_brackets_path( datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic( datadir, sheet_name='United States of America') syn_schools, syn_school_uids, syn_school_types = sp.send_students_to_school( school_sizes, uids_in_school, uids_in_school_by_age, ages_in_school_count, age_brackets, age_by_brackets_dic, contact_matrix_dic, verbose=False) assert syn_schools, syn_school_uids is not None return syn_schools, syn_school_uids
def get_pop_details(self, pop, dir, title_prefix, location, state_location, country_location, decimal=3): os.makedirs(dir, exist_ok=True) for setting_code in ['H', 'W', 'S']: average_contacts = utilities.get_average_contact_by_age( pop, self.datadir, setting_code=setting_code, decimal=decimal) fmt = f'%.{str(decimal)}f' # print(f"expected contacts by age for {code}:\n", average_contacts) utilities.plot_array( average_contacts, datadir=self.figDir, testprefix= f"{self.n}_seed_{self.seed}_{setting_code}_average_contacts", expect_label='Expected' if self.generateBaseline else 'Test') sc.savejson(os.path.join( dir, f"{self.n}_seed_{self.seed}_{setting_code}_average_contact.json" ), dict(enumerate(average_contacts.tolist())), indent=2) for type in ['density', 'frequency']: matrix = sp.calculate_contact_matrix(pop, type, setting_code) brackets = sp.get_census_age_brackets(self.datadir, state_location, country_location) ageindex = sp.get_age_by_brackets_dic(brackets) agg_matrix = sp.get_aggregate_matrix(matrix, ageindex) np.savetxt(os.path.join( dir, f"{self.n}_seed_{self.seed}_{setting_code}_{type}_contact_matrix.csv" ), agg_matrix, delimiter=",", fmt=fmt) fig = plot_age_mixing_matrices.test_plot_generated_contact_matrix( setting_code=setting_code, population=pop, title_prefix=" Expected " if self.generateBaseline else " Test ", density_or_frequency=type) # fig.show() fig.savefig( os.path.join( self.figDir, f"{self.n}_seed_{self.seed}_{setting_code}_{type}_contact_matrix.png" ))
def plot_generated_trimmed_contact_matrix(datadir, n, location='seattle_metro', state_location='Washington', country_location='usa', setting_code='H', aggregate_flag=True, logcolors_flag=True, density_or_frequency='density', trimmed_size_dic=None): popdict = {} options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} contacts = sp.make_contacts(popdict, country_location=country_location, state_location=state_location, location=location, options_args=options_args, network_distr_args=network_distr_args) contacts = sp.trim_contacts(contacts, trimmed_size_dic=trimmed_size_dic, use_clusters=False) age_brackets = sp.get_census_age_brackets( datadir, state_location=state_location, country_location=country_location) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) ages = [] for uid in contacts: ages.append(contacts[uid]['age']) num_agebrackets = len(age_brackets) age_count = Counter(ages) aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic) symmetric_matrix = calculate_contact_matrix(contacts, density_or_frequency, setting_code) fig = plot_contact_matrix(symmetric_matrix, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic, setting_code=setting_code, density_or_frequency=density_or_frequency, logcolors_flag=logcolors_flag, aggregate_flag=aggregate_flag) return fig
def plot_age_dist(datadir, pop, pars, do_show, testprefix): sp.logger.info( "Plot the expected age distribution and the generated age distribution." ) age_brackets = sp.get_census_age_brackets( datadir, country_location=pars['country_location'], state_location=pars['state_location'], location=pars['location']) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) if pars['smooth_ages']: expected_age_distr = sp.get_smoothed_single_year_age_distr( datadir, location=pars['location'], state_location=pars['state_location'], country_location=pars['country_location'], window_length=pars['window_length']) else: expected_age_distr = sp.get_smoothed_single_year_age_distr( datadir, location=pars['location'], state_location=pars['state_location'], country_location=pars['country_location'], window_length=1) gen_age_count = dict.fromkeys(expected_age_distr.keys(), 0) for i, person in pop.items(): gen_age_count[person['age']] += 1 gen_age_distr = sp.norm_dic(gen_age_count) fig, ax = sppl.plot_array( [v * 100 for v in expected_age_distr.values()], generated=[v * 100 for v in gen_age_distr.values()], do_show=False, binned=True, testprefix=testprefix.replace('_', ' ')) ax.set_xlabel('Ages') ax.set_ylabel('Distribution (%)') ax.set_ylim(bottom=0) ax.set_xlim(-1.5, max(age_by_brackets_dic.keys()) + 1.5) ax.set_title( f"Age Distribution of {pars['location'].replace('_', ' ')}: {pars['household_method'].replace('_', ' ')} method" ) fig.set_figheight(4) # reset the figure size fig.set_figwidth(7) return fig, ax
def test_plot_generated_trimmed_contact_matrix(setting_code='H', n=5000, aggregate_flag=True, logcolors_flag=True, density_or_frequency='density', with_facilities=False, cmap='cmr.freeze_r', fontsize=16, rotation=50): """ Plot the age mixing matrix for a specific setting where the edges are trimmed. Args: setting_code (str) : name of the physial contact setting: H for households, S for schools, W for workplaces, C for community or other n (int) : number of people in the population aggregate_flag (book) : If True, plot the contact matrix for aggregate age brackets, else single year age contact matrix. logcolors_flag (bool) : If True, plot heatmap in logscale density_or_frequency (str) : If 'density', then each contact counts for 1/(group size -1) of a person's contact in a group, elif 'frequency' then count each contact. This means that more people in a group leads to higher rates of contact/exposure. with_facilities (bool) : If True, create long term care facilities cmap(str or matplotlib colormap) : colormap fontsize (int) : base font size rotation (int) : rotation for x axis labels Returns: A fig object. """ datadir = sp.datadir state_location = 'Washington' location = 'seattle_metro' country_location = 'usa' # popdict = {} options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} # contacts = sp.make_contacts(popdict, state_location=state_location, location=location, options_args=options_args, # network_distr_args=network_distr_args) # contacts = sp.trim_contacts(contacts, trimmed_size_dic=None, use_clusters=False) population = sp.make_population(n, generate=True, with_facilities=with_facilities) age_brackets = sp.get_census_age_brackets(datadir, state_location=state_location, country_location=country_location) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) ages = [] for uid in population: ages.append(population[uid]['age']) age_count = Counter(ages) aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic) matrix = sp.calculate_contact_matrix(population, density_or_frequency, setting_code) fig = sp.plot_contact_matrix(matrix, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic, setting_code, density_or_frequency, logcolors_flag, aggregate_flag, cmap, fontsize, rotation) return fig
def plot_contact_matrix_after_intervention(n, n_days, interventions, intervention_name, location='seattle_metro', state_location='Washington', country_location='usa', aggregate_flag=True, logcolors_flag=True, density_or_frequency='density', setting_code='H', cmap='cmr.freeze_r', fontsize=16, rotation=50): """ Args: intervention (cv.intervention): a single intervention """ pars = sc.objdict(pop_size=n, n_days=n_days, pop_type='synthpops') # sim = sc.objdict() sim = cv.Sim(pars=pars, interventions=interventions) sim.run() age_brackets = sp.get_census_age_brackets( sp.datadir, state_location=state_location, country_location=country_location) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) ages = sim.people.age ages = np.round(ages, 1) ages = ages.astype(int) max_age = max(ages) age_count = Counter(ages) age_count = dict(age_count) for i in range(max_age + 1): if i not in age_count: age_count[i] = 0 aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic) matrix = calculate_contact_matrix(sim, density_or_frequency, setting_code) fig = sp.plot_contact_matrix(matrix, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic, setting_code, density_or_frequency, logcolors_flag, aggregate_flag, cmap, fontsize, rotation) return fig
def test_all(location='seattle_metro', state_location='Washington', country_location='usa', sheet_name='United States of America'): ''' Run all tests ''' sc.heading('Running all tests') sp.validate() # Validate that data files can be found # dropbox_path = sp.datadir dropbox_path = sp.settings.datadir age_bracket_distr = spdd.read_age_bracket_distr(dropbox_path, location, state_location, country_location) gender_fraction_by_age = sp.read_gender_fraction_by_age_bracket(dropbox_path, location, state_location, country_location) age_brackets_file, age_brackets_filepath = sp.get_census_age_brackets_path(dropbox_path, state_location, country_location) print(age_brackets_filepath) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) # ## Test selecting an age and sex for an individual ### a, s = sp.get_age_sex(gender_fraction_by_age, age_bracket_distr, age_brackets) print(a, s) # ## Test age mixing matrix ### # num_agebrackets = 18 # flu-like weights. calibrated to empirical diary survey data. weights_dic = {'H': 4.11, 'S': 11.41, 'W': 8.07, 'C': 2.79} age_mixing_matrix_dic = sp.get_contact_matrix_dic(dropbox_path, sheet_name) # ## Test sampling contacts based on age ### age, sex = sp.get_age_sex(gender_fraction_by_age, age_bracket_distr, age_brackets) # sample an age (and sex) from the seattle metro distribution n_contacts = 30 contact_ages = sp.sample_n_contact_ages(n_contacts, age, age_brackets, age_by_brackets_dic, age_mixing_matrix_dic, weights_dic) print(contact_ages) # shut down schools no_schools_weights = sc.dcp(weights_dic) no_schools_weights['S'] = 0.1 # research shows that even with school closure, kids still have some contact with their friends from school. f_reduced_contacts_students = 0.5 f_reduced_contacts_nonstudents = 0.2 if age < 20: n_reduced_contacts = int(n_contacts * (1 - f_reduced_contacts_students)) else: n_reduced_contacts = int(n_contacts * (1 - f_reduced_contacts_nonstudents)) contact_ages = sp.sample_n_contact_ages(n_reduced_contacts, age, age_brackets, age_by_brackets_dic, age_mixing_matrix_dic, no_schools_weights) print(contact_ages) return
def test_seattle_age_brackets(self): self.is_debugging = False age_brackets = sp.get_census_age_brackets(datadir=sp.datadir, state_location="Washington", country_location="usa", use_default=False) age_brackets_json = {} for k in age_brackets: age_brackets_json[k] = age_brackets[k].tolist() if self.is_debugging: with open(f"DEBUG_{self._testMethodName}_age_brackets.json", "w") as outfile: json.dump(age_brackets_json, outfile, indent=4) age_by_brackets_dic = sp.get_age_by_brackets_dic( age_brackets=age_brackets) self.verify_age_bracket_dictionary_correct(age_by_brackets_dic)
def test_generate_workplace_sizes(location='seattle_metro', state_location='Washington', country_location='usa', folder_name='contact_networks'): Npeople = 10000 uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(datadir, Npeople, location, state_location, country_location, folder_name=folder_name, use_default=True) school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(datadir, location, state_location, country_location) school_size_brackets = sp.get_school_size_brackets(datadir, location, state_location, country_location) school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket, school_size_brackets, uids_in_school) age_brackets_filepath = sp.get_census_age_brackets_path(datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic(datadir, sheet_name='United States of America') # Need to instead get syn_schools now syn_schools, syn_school_uids = sp.send_students_to_school(school_sizes, uids_in_school, uids_in_school_by_age, ages_in_school_count, age_brackets, age_by_brackets_dic, contact_matrix_dic) employment_rates = sp.get_employment_rates(datadir, location=location, state_location=state_location, country_location=country_location, use_default=True) age_by_uid_dic = sp.read_in_age_by_uid(datadir, location, state_location, country_location, folder_name, Npeople) potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers( syn_school_uids, employment_rates, age_by_uid_dic) workers_by_age_to_assign_count = sp.get_workers_by_age_to_assign(employment_rates, potential_worker_ages_left_count, age_by_uid_dic) workplace_size_brackets = sp.get_workplace_size_brackets(datadir, location, state_location, country_location, use_default=True) workplace_size_distr_by_brackets = sp.get_workplace_size_distr_by_brackets(datadir, state_location=state_location, country_location=country_location, use_default=True) workplace_sizes = sp.generate_workplace_sizes(workplace_size_distr_by_brackets, workplace_size_brackets, workers_by_age_to_assign_count) return workers_by_age_to_assign_count, workplace_size_brackets, workplace_size_distr_by_brackets, workplace_sizes
def test_send_students_to_school(n=10000, location='seattle_metro', state_location='Washington', country_location='usa', folder_name='contact_networks'): homes = sp.read_setting_groups(datadir, location, state_location, country_location, 'households', folder_name, n, with_ages=True) homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes) uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(datadir, n, location, state_location, country_location, age_by_uid_dic, homes_by_uids, use_default=False) # assert uids_in_school is not None # def test_send_students_to_school(n=1000, location='seattle_metro', state_location='Washington', # country_location='usa'): # homes = sp.get_head_age_by_size_distr(datadir, state_location, country_location, file_path=None, # household_size_1_included=False, use_default=True) # homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes, id_len=16) # uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(datadir, n, location, # state_location, # country_location, # age_by_uid_dic, # homes_by_uids, # use_default=False) # >>>>>>> origin/mf/update-saved-pop-fixes school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(datadir, location, state_location, country_location) school_size_brackets = sp.get_school_size_brackets(datadir, location, state_location, country_location) school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket, school_size_brackets, uids_in_school) age_brackets_filepath = sp.get_census_age_brackets_path(datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic(datadir, sheet_name='United States of America') syn_schools, syn_school_uids = sp.send_students_to_school(school_sizes, uids_in_school, uids_in_school_by_age, ages_in_school_count, age_brackets, age_by_brackets_dic, contact_matrix_dic, verbose=False) assert syn_schools, syn_school_uids is not None return syn_schools, syn_school_uids
def test_older_ages_have_household_contacts(): """ Test that older age groups (85+) have at least some household contacts with other older individuals if expected. Together, if sp.Pop.generate() uses the incorrect number of age brackets with the contact matrices, older age groups will not be generated as household contacts for each other (when we look at the generated contact matrix for households, the blocks between 85+ year olds would then be 0 for relatively large populations, even though the household contact matrix would have us expect otherwise.) """ test_pars = sc.dcp(pars) test_pars['n'] = 20e3 pop = sp.Pop(**test_pars) pop_dict = pop.to_dict() contact_matrix_dic = sp.get_contact_matrix_dic(sp.datadir, sheet_name=pop.sheet_name) contact_matrix_nbrackets = contact_matrix_dic[list( contact_matrix_dic.keys())[0]].shape[0] cm_age_brackets = sp.get_census_age_brackets( sp.datadir, country_location=pop.country_location, state_location=pop.state_location, location=pop.location, nbrackets=contact_matrix_nbrackets) cm_age_by_brackets_dic = sp.get_age_by_brackets_dic(cm_age_brackets) age_threshold = 85 age_threshold_bracket = cm_age_by_brackets_dic[age_threshold] expected_older_contact = np.sum( contact_matrix_dic['H'][age_threshold_bracket:, age_threshold_bracket:]) matrix = sp.calculate_contact_matrix(pop_dict, setting_code='H') gen_older_age_contacts = np.sum(matrix[age_threshold:, age_threshold:]) if expected_older_contact != 0: assert gen_older_age_contacts != 0, f'Check failed, individuals over {age_threshold} years old have no contacts with each other in households even though the household contact matrix expects them to.' else: assert gen_older_age_contacts == 0, f'Check failed, individuals over {age_threshold} years old have {gen_older_age_contacts} household contacts with each other even though the household contact matrix expects them to have none.' print('Check passed.')
def test_assign_rest_of_workers(state_location='Washington', country_location='usa'): workers_by_age_to_assign_count, workplace_size_brackets, workplace_size_distr_by_brackets, \ workplace_sizes = test_generate_workplace_sizes() potential_worker_uids, potential_worker_uids_by_age, employment_rates, age_by_uid_dic = test_get_uids_potential_workers( ) contact_matrix_dic = sp.get_contact_matrix_dic( datadir, sheet_name='United States of America') age_brackets_16 = sp.get_census_age_brackets(datadir, state_location, country_location) age_by_brackets_dic_16 = sp.get_age_by_brackets_dic(age_brackets_16) syn_workplaces, syn_workplace_uids, potential_worker_uids, potential_worker_uids_by_age, workers_by_age_to_assign_count = sp.assign_rest_of_workers( workplace_sizes, potential_worker_uids, potential_worker_uids_by_age, workers_by_age_to_assign_count, dict(age_by_uid_dic), age_brackets_16, age_by_brackets_dic_16, contact_matrix_dic) # TODO: Issue #116 assign_rest_of_workers returns empty syn_workplaces and syn_workplace_uids # syn_workplaces should return a list of lists where each sublist is a workplace with the ages of workers, not empty # for workplace in syn_workplaces: # assert workplace is not None # assert syn_workplaces != [] # syn_worplace_uids should be a list of workers ids, not empty # assert syn_workplace_uids != [] # potential_worker_uids should return a list of potential worker ids for worker_id in potential_worker_uids: assert worker_id is not None # potential_worker_uids_by_age should return a list of potential worker ids mapped by age for worker_by_age in potential_worker_uids_by_age: assert int(worker_by_age) # workers_by_age_to_assign_count should be a dictionary mapping age to the count of workers left to assign for worker in workers_by_age_to_assign_count.items(): assert tuple(worker)
def get_age_distribution_from_pop(pop, brackets, normalized=True): """ Get age distribution from the population dictionary Args: pop: population dictionary brackets: age brackets normalized: weather the result is normalized, default to True Returns: a dictionary with age distribution by brackets """ ageindex = sp.get_age_by_brackets_dic(brackets) actual_age_dist = dict.fromkeys(list(range(0, len(brackets))), 0) for p in pop.values(): actual_age_dist[ageindex[p['age']]] += 1 if normalized: actual_values = np.array(list(sp.norm_dic(actual_age_dist).values())) else: actual_values = np.array(list(actual_age_dist.values())) return actual_values
def test_send_students_to_school(location='seattle_metro', state_location='Washington', country_location='usa'): NPeople = 10000 uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school( datadir, NPeople, location, state_location, country_location, use_default=True) school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets( datadir, location, state_location, country_location) school_size_brackets = sp.get_school_size_brackets(datadir, location, state_location, country_location) school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket, school_size_brackets, uids_in_school) age_brackets_filepath = sp.get_census_age_brackets_path( datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic( datadir, sheet_name='United States of America') syn_schools, syn_school_uids = sp.send_students_to_school( school_sizes, uids_in_school, uids_in_school_by_age, ages_in_school_count, age_brackets, age_by_brackets_dic, contact_matrix_dic, verbose=False) assert syn_schools, syn_school_uids is not None
def check_employment_age_distribution(pop, n, datadir, figdir, location=None, state_location=None, country_location=None, file_path=None, use_default=False, test_prefix="", skip_stat_check=False, do_close=True): """ Check the population employment by age distribution against the reference data Args: pop : population dictionary n : population size datadir : root data directory which has resides the reference data figdir : directory where to result files are saved location : name of the location state_location : name of the state the location is in country_location : name of the country the location is in file_path : file path to user specified gender by age bracket distribution data use_default : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from Seattle, Washington. test_prefix : used for prefix of the plot title skip_stat_check : skip the statistics check for distribution do_close : close the image immediately if set to True Returns: None. Plots will be save to figdir if provided """ figdir = os.path.join(figdir, "employment") er = sp.get_employment_rates(datadir=datadir, location=location, state_location=state_location, country_location=country_location, file_path=file_path, use_default=use_default) brackets = sp.get_census_age_brackets(datadir=datadir, state_location=state_location, country_location=country_location) ageindex = sp.get_age_by_brackets_dic(brackets) age_dist = sp.read_age_bracket_distr(datadir=datadir, location=location, state_location=state_location, country_location=country_location, file_path=file_path, use_default=use_default) # counting the actual population by age with employment including teachers and staffs actual_employed_age_dist, actual_unemployed_age_dist = \ utilities.get_ids_count_by_param(pop, condition_name=['wpid', 'sc_teacher', 'sc_staff'], param='age') utilities.plot_array([ actual_employed_age_dist[k] for k in sorted(actual_employed_age_dist) ], datadir=figdir, names=[k for k in sorted(actual_employed_age_dist)], expect_label='employed by age count', xlabel_rotation=90, testprefix="employeed count by age " + test_prefix) utilities.plot_array([ actual_unemployed_age_dist[k] for k in sorted(actual_unemployed_age_dist) ], datadir=figdir, names=[k for k in sorted(actual_unemployed_age_dist)], expect_label='unemployed by age count', xlabel_rotation=90, testprefix="unemployed count by age " + test_prefix) sorted_actual_employed_rate = {} actual_employed_rate = utilities.calc_rate(actual_employed_age_dist, actual_unemployed_age_dist) for i in er.keys(): if i in actual_employed_rate: sorted_actual_employed_rate[i] = actual_employed_rate[i] else: sorted_actual_employed_rate[i] = 0 actual_values = np.array(list(sorted_actual_employed_rate.values())) expected_values = np.array(list(er.values())) if not skip_stat_check: utilities.statistic_test(expected_values, actual_values, test="x", comments="employment rate distribution check") # plotting fill 0 to under age 16 for better display filled_count = min(er.keys()) expected_values = np.insert(expected_values, 0, np.zeros(filled_count)) actual_values = np.insert(actual_values, 0, np.zeros(filled_count)) names = [i for i in range(0, max(er.keys()) + 1)] # somehow double stacks for age 100 utilities.plot_array( expected_values, actual_values, names=None, datadir=figdir, testprefix="employment rate distribution " + test_prefix, do_close=do_close, ) # check if total employment match expected_employed_brackets = {k: 0 for k in brackets} actual_employed_brackets = {k: 0 for k in brackets} for i in names: expected_employed_brackets[ageindex[i]] += expected_values[i] if i in actual_employed_age_dist: actual_employed_brackets[ ageindex[i]] += actual_employed_age_dist[i] for i in expected_employed_brackets: expected_employed_brackets[i] = expected_employed_brackets[i] / len( brackets[i]) * age_dist[i] * n expected_total = np.array(list(expected_employed_brackets.values())) actual_total = np.array(list(actual_employed_brackets.values())) utilities.plot_array(expected_total, actual_total, names=brackets.keys(), datadir=figdir, testprefix="employment total " + test_prefix, do_close=do_close) expected_etotal = np.round(np.sum(expected_total)) actual_etotal = np.round(np.sum(actual_total)) utilities.check_error_percentage(n, expected_etotal, actual_etotal, name="employee")
import synthpops as sp datadir = sp.datadir state_location = 'Washington' country_location = 'usa' acs_period = 1 # save = True save = False age_brackets = sp.get_census_age_brackets(datadir, state_location=state_location, country_location=country_location, nbrackets=18) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) ltcf_rates_by_age = sp.process_long_term_care_facility_rates_by_age( datadir, state_location, country_location) if save: sp.write_long_term_care_facility_use_rates(datadir, state_location, country_location, ltcf_rates_by_age) ltcf_rates_by_age = sp.get_long_term_care_facility_use_rates( datadir, state_location=state_location, country_location=country_location) # use the data to estimate the number of long term care facility users for a local region and a given population size local_population_size = 225e3 location = 'Seattle-Tacoma-Bellevue-WA-Metro-Area' location = 'Washington'
def plot_data_contact_matrix(datadir, location='seattle_metro', state_location='Washington', country_location='usa', sheet_name='United States of America', setting_code='H', logcolors_flag=True): asymmetric_M = sp.get_contact_matrix(datadir, setting_code, sheet_name=sheet_name) age_brackets = sp.get_census_age_brackets( datadir, state_location=state_location, country_location=country_location) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) cmap = mplt.cm.get_cmap(cmocean.cm.matter_r) fig = plt.figure(figsize=(9, 9)) ax = fig.add_subplot(111) titles = {'H': 'Household', 'S': 'School', 'W': 'Work'} if logcolors_flag: vbounds = {} if density_or_frequency == 'density': # if aggregate_flag: vbounds['H'] = {'vmin': 1e-2, 'vmax': 1e1} vbounds['S'] = {'vmin': 1e-3, 'vmax': 1e-0} vbounds['W'] = {'vmin': 1e-3, 'vmax': 1e-0} # else: # vbounds['H'] = {'vmin': 1e-3, 'vmax': 1e-1} # vbounds['S'] = {'vmin': 1e-3, 'vmax': 1e-1} # vbounds['W'] = {'vmin': 1e-3, 'vmax': 1e-1} elif density_or_frequency == 'frequency': # if aggregate_flag: vbounds['H'] = {'vmin': 1e-2, 'vmax': 1e0} vbounds['S'] = {'vmin': 1e-2, 'vmax': 1e1} vbounds['W'] = {'vmin': 1e-2, 'vmax': 1e0} # else: # vbounds['H'] = {'vmin': 1e-2, 'vmax': 1e0} # vbounds['S'] = {'vmin': 1e-2, 'vmax': 1e0} # vbounds['W'] = {'vmin': 1e-2, 'vmax': 1e0} im = ax.imshow(asymmetric_M.T, origin='lower', interpolation='nearest', cmap=cmap, norm=LogNorm(vmin=vbounds[setting_code]['vmin'], vmax=vbounds[setting_code]['vmax'])) else: im = ax.imshow(asymmetric_M.T, origin='lower', interpolation='nearest', cmap=cmap) implot = im divider = make_axes_locatable(ax) cax = divider.new_horizontal(size="4%", pad=0.15) fig.add_axes(cax) cbar = fig.colorbar(implot, cax=cax) cbar.ax.tick_params(axis='y', labelsize=20) if density_or_frequency == 'frequency': cbar.ax.set_ylabel('Frequency of Contacts', fontsize=20) else: cbar.ax.set_ylabel('Density of Contacts', fontsize=20) ax.tick_params(labelsize=20) ax.set_xlabel('Age', fontsize=24) ax.set_ylabel('Age of Contacts', fontsize=24) ax.set_title(titles[setting_code] + ' Contact Patterns', fontsize=28) if aggregate_flag: tick_labels = [ str(age_brackets[b][0]) + '-' + str(age_brackets[b][-1]) for b in age_brackets ] ax.set_xticks(np.arange(len(tick_labels))) ax.set_xticklabels(tick_labels, fontsize=1) ax.set_xticklabels(tick_labels, fontsize=18, rotation=50) ax.set_yticks(np.arange(len(tick_labels))) ax.set_yticklabels(tick_labels, fontsize=18) return fig