def test_webapp_make_contacts_and_show_some_layers(n=default_n, n_contacts_dic=None, state_location='Washington', location='seattle_metro', country_location='usa', sheet_name='United States of America'): sc.heading(f'Making popdict for {n} people') n = int(n) popdict = sp.make_popdict(n=n, state_location=state_location, location=location, country_location=country_location) options_args = dict.fromkeys(['use_age', 'use_social_layers', 'use_age_mixing'], True) contacts = sp.make_contacts(popdict, n_contacts_dic=n_contacts_dic, state_location=state_location, location=location, country_location=country_location, sheet_name=sheet_name, options_args=options_args) uids = contacts.keys() uids = [uid for uid in uids] for n, uid in enumerate(uids): if n > 20: break layers = contacts[uid]['contacts'] print(n, 'uid', uid, 'age', contacts[uid]['age'], 'total contacts', np.sum([len(contacts[uid]['contacts'][k]) for k in layers])) for k in layers: contact_ages = [contacts[c]['age'] for c in contacts[uid]['contacts'][k]] print(k, len(contact_ages), 'contact ages', contact_ages) print() return popdict
def test_plot_generated_trimmed_contact_matrix(setting_code='H', n=5000, aggregate_flag=True, logcolors_flag=True, density_or_frequency='density'): datadir = sp.datadir state_location = 'Washington' location = 'seattle_metro' country_location = 'usa' popdict = {} options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} contacts = sp.make_contacts(popdict, state_location=state_location, location=location, options_args=options_args, network_distr_args=network_distr_args) contacts = sp.trim_contacts(contacts, trimmed_size_dic=None, use_clusters=False) age_brackets = sp.get_census_age_brackets(datadir, state_location=state_location, country_location=country_location) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) ages = [] for uid in contacts: ages.append(contacts[uid]['age']) age_count = Counter(ages) aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic) freq_matrix_dic = sp.calculate_contact_matrix(contacts, density_or_frequency) fig = sp.plot_contact_frequency(freq_matrix_dic, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic, setting_code, density_or_frequency, logcolors_flag, aggregate_flag) return fig
def make_random_contacts(n=2e3, location='seattle_metro', state_location='Washington', country_location='usa', average_degree=30, verbose=False): """ Make a popdict of n people, age and sex sampled from Seattle Metro demographics but random contacts. Network created is an Erdos-Renyi network with average degree of 30. """ popdict = sp.make_popdict(n=n, location=location, state_location=state_location, country_location=country_location, use_demography=True) network_distr_args = {'average_degree': average_degree} contacts = sp.make_contacts(popdict, network_distr_args=network_distr_args) if verbose: # print uid and uids of contacts uids = contacts.keys() uids = [uid for uid in uids] for uid in uids: print(uid, contacts[uid]['contacts']['M']) return contacts
def test_make_contacts_from_microstructure(location='seattle_metro', state_location='Washington', Npop=50000): options_args = dict.fromkeys(['use_microstructure'], True) network_distr_args = {'Npop': Npop} contacts = sp.make_contacts(state_location=state_location, location=location, options_args=options_args, network_distr_args=network_distr_args) uids = contacts.keys() uids = [uid for uid in uids] for n, uid in enumerate(uids): if n > 20: break layers = contacts[uid]['contacts'] print('uid', uid, 'age', contacts[uid]['age'], 'total contacts', np.sum([len(contacts[uid]['contacts'][k]) for k in layers])) for k in layers: contact_ages = [ contacts[c]['age'] for c in contacts[uid]['contacts'][k] ] print(k, len(contact_ages), 'contact ages', contact_ages) print() return contacts
def test_make_contacts(n=default_n): sc.heading(f'Making contacts for {n} people') popdict = popdict = sp.make_popdict(n=n) options_args = dict.fromkeys(['use_age', 'use_sex', 'use_loc', 'use_social_layers'], True) contacts = sp.make_contacts(popdict, options_args=options_args) return contacts
def plot_generated_trimmed_contact_matrix(datadir, n, location='seattle_metro', state_location='Washington', country_location='usa', setting_code='H', aggregate_flag=True, logcolors_flag=True, density_or_frequency='density', trimmed_size_dic=None): popdict = {} options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} contacts = sp.make_contacts(popdict, country_location=country_location, state_location=state_location, location=location, options_args=options_args, network_distr_args=network_distr_args) contacts = sp.trim_contacts(contacts, trimmed_size_dic=trimmed_size_dic, use_clusters=False) age_brackets = sp.get_census_age_brackets( datadir, state_location=state_location, country_location=country_location) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) ages = [] for uid in contacts: ages.append(contacts[uid]['age']) num_agebrackets = len(age_brackets) age_count = Counter(ages) aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic) symmetric_matrix = calculate_contact_matrix(contacts, density_or_frequency, setting_code) fig = plot_contact_matrix(symmetric_matrix, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic, setting_code=setting_code, density_or_frequency=density_or_frequency, logcolors_flag=logcolors_flag, aggregate_flag=aggregate_flag) return fig
def make_contacts(): # Copied from test_contacts.py weights_dic = {'H': 4.11, 'S': 11.41, 'W': 8.07, 'R': 2.79} weights_dic[ 'R'] = 7 # increase the general community weight because the calibrate weight 2.79 doesn't include contacts from the general community that you don't know but are near! n = 10000 kwargs = dict(weights_dic=weights_dic, use_social_layers=True, directed=False, use_student_weights=True) # Crashes if False popdict = sp.make_popdict(n=n) contacts = sp.make_contacts(popdict, **kwargs) return contacts
def test_trimmed_contacts_are_bidirectional(): num_people = 5 checked_people = [] last_index_checked = 0 close_contacts_numbers = {'S': 10, 'W': 10} is_debugging = False my_contacts = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) my_trim_contacts = sp.trim_contacts( my_contacts, trimmed_size_dic=close_contacts_numbers) popdict = my_trim_contacts uids = popdict.keys() uid_list = list(uids) while len(checked_people) < num_people: my_uid, last_index_checked, checked_people = \ find_fresh_uid(uid_list=uid_list, last_index_checked=last_index_checked, checked_people=checked_people) first_person = popdict[my_uid] person_json = make_person_json(popdict=popdict, target_uid=my_uid) if is_debugging: person_filename = f"DEBUG_popdict_person_{my_uid}.json" print(f"TEST: {my_uid}") if os.path.isfile(person_filename): os.unlink(person_filename) pass with open(person_filename, "w") as outfile: json.dump(person_json, outfile, indent=4, sort_keys=True) pass pass # Now check that each person in each network has me in their network check_bidirectionality_of_contacts(person_json=person_json, popdict=popdict) pass pass
def test_make_contacts_generic(n=default_n): sc.heading(f'Making popdict for {n} people') n = int(n) popdict = sp.make_popdict(n=n,use_demography=False) contacts = sp.make_contacts(popdict) uids = contacts.keys() uids = [uid for uid in uids] for n,uid in enumerate(uids): if n > 20: break layers = contacts[uid]['contacts'] print('uid',uid,'age',contacts[uid]['age'],'total contacts', np.sum([len(contacts[uid]['contacts'][k]) for k in layers])) for k in layers: contact_ages = [contacts[c]['age'] for c in contacts[uid]['contacts'][k]] print(k,len(contact_ages),'contact ages',contact_ages) print() return contacts
def test_make_contacts_and_show_some_layers(n=default_n,n_contacts_dic=None,state_location='Washington',location='seattle_metro',country_location='usa'): sc.heading(f'Make contacts for {int(n)} people and showing some layers') popdict = sp.make_popdict(n=1e3,state_location=state_location,location=location) options_args = dict.fromkeys(['use_age','use_sex','use_loc','use_age_mixing','use_social_layers'], True) contacts = sp.make_contacts(popdict,n_contacts_dic=n_contacts_dic,state_location=state_location,location=location,country_location=country_location,options_args=options_args) uids = contacts.keys() uids = [uid for uid in uids] for n,uid in enumerate(uids): if n > 20: break layers = contacts[uid]['contacts'] print('uid',uid,'age',contacts[uid]['age'],'total contacts', np.sum([len(contacts[uid]['contacts'][k]) for k in layers])) for k in layers: contact_ages = [contacts[c]['age'] for c in contacts[uid]['contacts'][k]] print(k,len(contact_ages),'contact ages',contact_ages) print() return contacts
''' Little script for saving contacts to disk for ingestion by covasim. ''' import pylab as pl import sciris as sc import synthpops as sp contacts = sp.make_contacts(options_args={'use_microstructure': True}) # fn = 'contacts_48797.obj' # sc.saveobj(filename=fn,obj=contacts) cdict = sc.odict(contacts) keys = cdict[0]['contacts'].keys() counts = {} for key in keys: counts[key] = [] for c in cdict.values(): for key in keys: count = len(c['contacts'][key]) counts[key].append(count) for k, key in enumerate(keys): pl.subplot(2, 2, k + 1) pl.hist(counts[key], bins=50) pl.title(key)
state_location = 'Washington' location = 'seattle_metro' country_location = 'usa' popdict = {} n = 20000 options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} sc.tic() contacts = sp.make_contacts(popdict, state_location=state_location, location=location, options_args=options_args, network_distr_args=network_distr_args) # uids = contacts.keys() # uids = [uid for uid in uids] # print(contacts[uids[3]]['contacts']) # contacts = sp.trim_contacts(contacts,trimmed_size_dic=None,use_clusters=False) # print(contacts[uids[3]]['contacts']) sp.save_synthpop( os.path.join(datadir, 'demographics', country_location, state_location), contacts) sc.toc()
def make_population(n=None, max_contacts=None, as_objdict=False, generate=False): ''' Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data. Args: n (int) : The number of people to create. max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work). as_objdict (bool) : If True, change popdict type to ``sc.objdict``. generate (bool) : If True, first look for cached population files and if those are not available, generate new population Returns: network (dict): A dictionary of the full population with ages and connections. ''' default_n = 10000 default_max_contacts = { 'S': 20, 'W': 10 } # this can be anything but should be based on relevant average number of contacts for the population under study if n is None: n = default_n n = int(n) if n not in popsize_choices: if not generate: choicestr = ', '.join([str(choice) for choice in popsize_choices]) errormsg = f'Number of people must be one of {choicestr}, not {n}' raise ValueError(errormsg) # else: # Let's start generating a new network shall we? max_contacts = sc.mergedicts(default_max_contacts, max_contacts) country_location = 'usa' state_location = 'Washington' location = 'seattle_metro' sheet_name = 'United States of America' options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} # Heavy lift 1: make the contacts and their connections try: # try to read in from file population = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) except: # make a new network on the fly if generate: population = sp.generate_synthetic_population( n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, plot=False, return_popdict=True) else: raise ValueError(errormsg) # Semi-heavy-lift 2: trim them to the desired numbers population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False) # Change types if as_objdict: population = sc.objdict(population) for key, person in population.items(): if as_objdict: population[key] = sc.objdict(population[key]) population[key]['contacts'] = sc.objdict( population[key]['contacts']) for layerkey in population[key]['contacts'].keys(): population[key]['contacts'][layerkey] = list( population[key]['contacts'][layerkey]) return population
#region pre-test setup datadir = sp.datadir # point datadir where your data folder lives # location information - currently we only support the Seattle Metro area in full, however other locations can be supported with this framework at a later date location = 'seattle_metro' state_location = 'Washington' country_location = 'usa' n = 5000 # load population into a dictionary of individuals who know who their contacts are options_args = {'use_microstructure': True} network_distr_args = {'Npop': n} contacts = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) # close_contacts_number = {'S': 10, 'W': 10} # CONTACTS = sp.trim_contacts(contacts, trimmed_size_dic=close_contacts_number) CONTACTS = contacts #endregion def find_fresh_uid(uid_list, last_index_checked, checked_people): my_uid = None while not my_uid: potential_uid = uid_list[last_index_checked] if potential_uid not in checked_people: my_uid = potential_uid
def make_population(n=None, max_contacts=None, generate=None, with_industry_code=False, with_facilities=False, use_two_group_reduction=True, average_LTCF_degree=20, ltcf_staff_age_min=20, ltcf_staff_age_max=60, with_school_types=False, school_mixing_type='random', average_class_size=20, inter_grade_mixing=0.1, average_student_teacher_ratio=20, average_teacher_teacher_degree=3, teacher_age_min=25, teacher_age_max=75, with_non_teaching_staff=False, average_student_all_staff_ratio=15, average_additional_staff_degree=20, staff_age_min=20, staff_age_max=75, rand_seed=None): ''' Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington data. Args: n (int) : The number of people to create. max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "W" (work). generate (bool) : If True, generate a new population. Else, look for cached population and if those are not available, generate a new population. with_industry_code (bool) : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US. with_facilities (bool) : If True, create long term care facilities, currently only available for locations in the US. use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups. average_LTCF_degree (float) : default average degree in long term care facilities. ltcf_staff_age_min (int) : Long term care facility staff minimum age. ltcf_staff_age_max (int) : Long term care facility staff maximum age. with_school_types (bool) : If True, creates explicit school types. school_mixing_type (str or dict) : The mixing type for schools, 'random', 'age_clustered', or 'age_and_class_clustered' if string, and a dictionary of these by school type otherwise. average_class_size (float) : The average classroom size. inter_grade_mixing (float) : The average fraction of mixing between grades in the same school for clustered school mixing types. average_student_teacher_ratio (float) : The average number of students per teacher. average_teacher_teacher_degree (float) : The average number of contacts per teacher with other teachers. teacher_age_min (int) : The minimum age for teachers. teacher_age_max (int) : The maximum age for teachers. with_non_teaching_staff (bool) : If True, includes non teaching staff. average_student_all_staff_ratio (float) : The average number of students per staff members at school (including both teachers and non teachers). average_additional_staff_degree (float) : The average number of contacts per additional non teaching staff in schools. staff_age_min (int) : The minimum age for non teaching staff. staff_age_max (int) : The maximum age for non teaching staff. rand_seed (int) : Start point random sequence is generated from. Returns: network (dict): A dictionary of the full population with ages and connections. ''' log.debug('make_population()') if rand_seed is not None: sp.set_seed(rand_seed) default_n = 10000 default_max_contacts = {'W': 20} # this can be anything but should be based on relevant average number of contacts for the population under study if n is None: n = default_n n = int(n) if n not in popsize_choices: if generate is False: choicestr = ', '.join([str(choice) for choice in popsize_choices]) errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}' raise ValueError(errormsg) else: generate = True # If n not found in popsize_choices and generate was not False, generate a new population. # Default to False, unless LTCF are requested if generate is None: if with_facilities: generate = True else: generate = False max_contacts = sc.mergedicts(default_max_contacts, max_contacts) country_location = 'usa' state_location = 'Washington' location = 'seattle_metro' sheet_name = 'United States of America' options_args = {} options_args['use_microstructure'] = True options_args['use_industry_code'] = with_industry_code options_args['use_long_term_care_facilities'] = with_facilities options_args['use_two_group_reduction'] = use_two_group_reduction options_args['with_school_types'] = with_school_types options_args['with_non_teaching_staff'] = with_non_teaching_staff network_distr_args = {} network_distr_args['Npop'] = int(n) network_distr_args['average_LTCF_degree'] = average_LTCF_degree network_distr_args['average_class_size'] = average_class_size network_distr_args['average_student_teacher_ratio'] = average_student_teacher_ratio network_distr_args['average_teacher_teacher_degree'] = average_teacher_teacher_degree network_distr_args['inter_grade_mixing'] = inter_grade_mixing network_distr_args['average_student_all_staff_ratio'] = average_student_all_staff_ratio network_distr_args['average_additional_staff_degree'] = average_additional_staff_degree network_distr_args['school_mixing_type'] = school_mixing_type # Heavy lift 1: make the contacts and their connections if not generate: log.debug('Not generating a new population') # must read in from file, will fail if the data has not yet been generated population = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, options_args=options_args, network_distr_args=network_distr_args) else: log.debug('Generating a new population...') if with_facilities and with_industry_code: errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.' raise ValueError(errormsg) elif with_facilities: population = sp.generate_microstructure_with_facilities(sp.datadir, location=location, state_location=state_location, country_location=country_location, n=n, sheet_name=sheet_name, use_two_group_reduction=use_two_group_reduction, average_LTCF_degree=average_LTCF_degree, ltcf_staff_age_min=ltcf_staff_age_min, ltcf_staff_age_max=ltcf_staff_age_max, with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing, average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max, average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max, return_popdict=True, trimmed_size_dic=max_contacts) else: population = sp.generate_synthetic_population(n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing, average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max, average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max, return_popdict=True, trimmed_size_dic=max_contacts, ) # Semi-heavy-lift 2: trim them to the desired numbers # population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False) # Change types for key, person in population.items(): for layerkey in population[key]['contacts'].keys(): population[key]['contacts'][layerkey] = list(population[key]['contacts'][layerkey]) log.debug('make_population(): done.') return population
def make_population(n=None, max_contacts=None, generate=None, with_industry_code=False, with_facilities=False, use_two_group_reduction=True, average_LTCF_degree=20, rand_seed=None): ''' Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data. Args: n (int) : The number of people to create. max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work). generate (bool) : If True, first look for cached population files and if those are not available, generate new population with_industry_code (bool) : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US with_facilities (bool) : If True, create long term care facilities use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups average_LTCF_degree (int) : default average degree in long term care facilities Returns: network (dict): A dictionary of the full population with ages and connections. ''' if rand_seed is not None: sp.set_seed(rand_seed) default_n = 10000 default_max_contacts = { 'S': 20, 'W': 20 } # this can be anything but should be based on relevant average number of contacts for the population under study if n is None: n = default_n n = int(n) if n not in popsize_choices: if generate is False: choicestr = ', '.join([str(choice) for choice in popsize_choices]) errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}' raise ValueError(errormsg) else: generate = True # If not found, generate # Default to False, unless LTCF are requested if generate is None: if with_facilities: generate = True else: generate = False max_contacts = sc.mergedicts(default_max_contacts, max_contacts) country_location = 'usa' state_location = 'Washington' location = 'seattle_metro' sheet_name = 'United States of America' options_args = { 'use_microstructure': True, 'use_industry_code': with_industry_code, 'use_long_term_care_facilities': with_facilities, 'use_two_group_reduction': use_two_group_reduction, 'average_LTCF_degree': average_LTCF_degree } network_distr_args = {'Npop': int(n)} # Heavy lift 1: make the contacts and their connections if not generate: # must read in from file, will fail if the data has not yet been generated population = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) else: # make a new network on the fly if with_facilities and with_industry_code: errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.' raise ValueError(errormsg) elif with_facilities: population = sp.generate_microstructure_with_facilities( sp.datadir, location=location, state_location=state_location, country_location=country_location, n=n, return_popdict=True, use_two_group_reduction=use_two_group_reduction, average_LTCF_degree=average_LTCF_degree) else: population = sp.generate_synthetic_population( n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, plot=False, return_popdict=True) # Semi-heavy-lift 2: trim them to the desired numbers population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False) # Change types for key, person in population.items(): for layerkey in population[key]['contacts'].keys(): population[key]['contacts'][layerkey] = list( population[key]['contacts'][layerkey]) return population
def make_contacts_by_social_layers_and_age_mixing( n=2e3, location='seattle_metro', state_location='Washington', country_location='usa', sheet_name='United States of America', verbose=False): """ Make a popdict of n people, age and sex sampled from Seattle Metro demographics. Contacts are created and stored by layer. Layers are : 'H' - households 'S' - schools 'W' - workplaces 'C' - general community Use sheet_name to decide which set of age mixing patterns to sample contact ages from. Age mixing patterns in the population will match this approximately, but contact networks will still be random in the sense that clustering or triangles will not be enforced. For example, an individual's contacts in the household layer ('H') will match what's expected given their age, but their contacts won't have the same contacts as them. This means the model is not directly creating households, schools, or worksplaces, but contacts for each individual similar to those expected in terms of age. Caveat: students/teachers interact at school and workers interact at work, but they won't interact in both. What's the purpose of this without clustering you ask? Compartmental models routinely use age mixing matrices to model the effects of age mixing patterns on infectious disease spreading at the aggregate level. Agent based models require information at the individual level and this allows us to bring some of that age mixing from compartmental models into an agent based modeling framework. """ popdict = sp.make_popdict(n=n, location=location, state_location=state_location, country_location=country_location, use_demography=True) n_contacts_dic = { 'H': 7, 'S': 20, 'W': 20, 'C': 10 } # dict of the average number of contacts per layer options_args = {'use_age_mixing': True, 'use_social_layers': True} contacts = sp.make_contacts(popdict, n_contacts_dic=n_contacts_dic, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, options_args=options_args) if verbose: layers = ['H', 'S', 'W', 'C'] uids = contacts.keys() uids = [uid for uid in uids] for uid in uids: print(uid) for k in layers: print(k, contacts[uid]['contacts'][k]) return contacts