def test_scale(self): seed = 1 # set param average_student_teacher_ratio = 22 average_student_all_staff_ratio = 15 datadir = self.dataDir location = 'seattle_metro' state_location = 'Washington' country_location = 'usa' i = 0 for n in [2001, 10001]: try: pop = {} sp.set_seed(seed) print(seed) pop = sp.generate_synthetic_population(n, datadir,average_student_teacher_ratio=average_student_teacher_ratio, average_student_all_staff_ratio=average_student_all_staff_ratio, return_popdict=True) sc.savejson(os.path.join(self.resultdir, f"calltwice_{n}_{i}.json"), pop, indent=2) result = utilities.check_teacher_staff_ratio(pop, self.dataDir, f"calltwice_{n}_{i}", average_student_teacher_ratio, average_student_all_staff_ratio=average_student_all_staff_ratio, err_margin=2) utilities_dist.check_enrollment_distribution(pop, n, datadir, location, state_location, country_location, test_prefix=f"calltwice{n}_{i}", skip_stat_check=True, do_close=self.do_close) utilities_dist.check_age_distribution(pop, n, datadir, self.resultdir, location, state_location, country_location, test_prefix=f"calltwice{n}_{i}", do_close=self.do_close) i += 1 except: print("check failed, continue...") return result
import json import collections import scipy import utilities import synthpops as sp from synthpops import households as sphh from synthpops import data_distributions as spdd # the default test data was generated for 500 people using the below parameters # and each test case will validate the properties of the population named "seapop_500" seapop_500 = sp.generate_synthetic_population( n=500, datadir=sp.settings.datadir, location='seattle_metro', state_location='Washington', country_location='usa', sheet_name='United States of America', plot=False, write=False, return_popdict=True, use_default=False, ) class HouseholdsTest(unittest.TestCase): def setUp(self) -> None: """ Set up class variables Returns: None """
def make_population(n=None, max_contacts=None, as_objdict=False, generate=False): ''' Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data. Args: n (int) : The number of people to create. max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work). as_objdict (bool) : If True, change popdict type to ``sc.objdict``. generate (bool) : If True, first look for cached population files and if those are not available, generate new population Returns: network (dict): A dictionary of the full population with ages and connections. ''' default_n = 10000 default_max_contacts = { 'S': 20, 'W': 10 } # this can be anything but should be based on relevant average number of contacts for the population under study if n is None: n = default_n n = int(n) if n not in popsize_choices: if not generate: choicestr = ', '.join([str(choice) for choice in popsize_choices]) errormsg = f'Number of people must be one of {choicestr}, not {n}' raise ValueError(errormsg) # else: # Let's start generating a new network shall we? max_contacts = sc.mergedicts(default_max_contacts, max_contacts) country_location = 'usa' state_location = 'Washington' location = 'seattle_metro' sheet_name = 'United States of America' options_args = {'use_microstructure': True} network_distr_args = {'Npop': int(n)} # Heavy lift 1: make the contacts and their connections try: # try to read in from file population = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) except: # make a new network on the fly if generate: population = sp.generate_synthetic_population( n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, plot=False, return_popdict=True) else: raise ValueError(errormsg) # Semi-heavy-lift 2: trim them to the desired numbers population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False) # Change types if as_objdict: population = sc.objdict(population) for key, person in population.items(): if as_objdict: population[key] = sc.objdict(population[key]) population[key]['contacts'] = sc.objdict( population[key]['contacts']) for layerkey in population[key]['contacts'].keys(): population[key]['contacts'][layerkey] = list( population[key]['contacts'][layerkey]) return population
import synthpops as sp if __name__ == '__main__': sp.validate() datadir = sp.datadir state_location = 'Washington' location = 'seattle_metro' country_location = 'usa' sheet_name = 'United States of America' n = 2000 verbose = True verbose = False plot = True # plot = False school_enrollment_counts_available = False use_default = False sp.generate_synthetic_population( n, datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, school_enrollment_counts_available=school_enrollment_counts_available, verbose=verbose, plot=plot, use_default=use_default)
def make_population(n=None, max_contacts=None, generate=None, with_industry_code=False, with_facilities=False, use_two_group_reduction=True, average_LTCF_degree=20, ltcf_staff_age_min=20, ltcf_staff_age_max=60, with_school_types=False, school_mixing_type='random', average_class_size=20, inter_grade_mixing=0.1, average_student_teacher_ratio=20, average_teacher_teacher_degree=3, teacher_age_min=25, teacher_age_max=75, with_non_teaching_staff=False, average_student_all_staff_ratio=15, average_additional_staff_degree=20, staff_age_min=20, staff_age_max=75, rand_seed=None): ''' Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington data. Args: n (int) : The number of people to create. max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "W" (work). generate (bool) : If True, generate a new population. Else, look for cached population and if those are not available, generate a new population. with_industry_code (bool) : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US. with_facilities (bool) : If True, create long term care facilities, currently only available for locations in the US. use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups. average_LTCF_degree (float) : default average degree in long term care facilities. ltcf_staff_age_min (int) : Long term care facility staff minimum age. ltcf_staff_age_max (int) : Long term care facility staff maximum age. with_school_types (bool) : If True, creates explicit school types. school_mixing_type (str or dict) : The mixing type for schools, 'random', 'age_clustered', or 'age_and_class_clustered' if string, and a dictionary of these by school type otherwise. average_class_size (float) : The average classroom size. inter_grade_mixing (float) : The average fraction of mixing between grades in the same school for clustered school mixing types. average_student_teacher_ratio (float) : The average number of students per teacher. average_teacher_teacher_degree (float) : The average number of contacts per teacher with other teachers. teacher_age_min (int) : The minimum age for teachers. teacher_age_max (int) : The maximum age for teachers. with_non_teaching_staff (bool) : If True, includes non teaching staff. average_student_all_staff_ratio (float) : The average number of students per staff members at school (including both teachers and non teachers). average_additional_staff_degree (float) : The average number of contacts per additional non teaching staff in schools. staff_age_min (int) : The minimum age for non teaching staff. staff_age_max (int) : The maximum age for non teaching staff. rand_seed (int) : Start point random sequence is generated from. Returns: network (dict): A dictionary of the full population with ages and connections. ''' log.debug('make_population()') if rand_seed is not None: sp.set_seed(rand_seed) default_n = 10000 default_max_contacts = {'W': 20} # this can be anything but should be based on relevant average number of contacts for the population under study if n is None: n = default_n n = int(n) if n not in popsize_choices: if generate is False: choicestr = ', '.join([str(choice) for choice in popsize_choices]) errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}' raise ValueError(errormsg) else: generate = True # If n not found in popsize_choices and generate was not False, generate a new population. # Default to False, unless LTCF are requested if generate is None: if with_facilities: generate = True else: generate = False max_contacts = sc.mergedicts(default_max_contacts, max_contacts) country_location = 'usa' state_location = 'Washington' location = 'seattle_metro' sheet_name = 'United States of America' options_args = {} options_args['use_microstructure'] = True options_args['use_industry_code'] = with_industry_code options_args['use_long_term_care_facilities'] = with_facilities options_args['use_two_group_reduction'] = use_two_group_reduction options_args['with_school_types'] = with_school_types options_args['with_non_teaching_staff'] = with_non_teaching_staff network_distr_args = {} network_distr_args['Npop'] = int(n) network_distr_args['average_LTCF_degree'] = average_LTCF_degree network_distr_args['average_class_size'] = average_class_size network_distr_args['average_student_teacher_ratio'] = average_student_teacher_ratio network_distr_args['average_teacher_teacher_degree'] = average_teacher_teacher_degree network_distr_args['inter_grade_mixing'] = inter_grade_mixing network_distr_args['average_student_all_staff_ratio'] = average_student_all_staff_ratio network_distr_args['average_additional_staff_degree'] = average_additional_staff_degree network_distr_args['school_mixing_type'] = school_mixing_type # Heavy lift 1: make the contacts and their connections if not generate: log.debug('Not generating a new population') # must read in from file, will fail if the data has not yet been generated population = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, options_args=options_args, network_distr_args=network_distr_args) else: log.debug('Generating a new population...') if with_facilities and with_industry_code: errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.' raise ValueError(errormsg) elif with_facilities: population = sp.generate_microstructure_with_facilities(sp.datadir, location=location, state_location=state_location, country_location=country_location, n=n, sheet_name=sheet_name, use_two_group_reduction=use_two_group_reduction, average_LTCF_degree=average_LTCF_degree, ltcf_staff_age_min=ltcf_staff_age_min, ltcf_staff_age_max=ltcf_staff_age_max, with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing, average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max, average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max, return_popdict=True, trimmed_size_dic=max_contacts) else: population = sp.generate_synthetic_population(n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing, average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max, average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max, return_popdict=True, trimmed_size_dic=max_contacts, ) # Semi-heavy-lift 2: trim them to the desired numbers # population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False) # Change types for key, person in population.items(): for layerkey in population[key]['contacts'].keys(): population[key]['contacts'][layerkey] = list(population[key]['contacts'][layerkey]) log.debug('make_population(): done.') return population
import unittest import numpy as np import json import synthpops as sp from synthpops import households as sphh from synthpops import data_distributions as spdd seapop_500 = sp.generate_synthetic_population( n=500, datadir=sp.datadir, location='seattle_metro', state_location='Washington', country_location='usa', sheet_name='United States of America', school_enrollment_counts_available=False, verbose=False, plot=False, write=False, return_popdict=True, use_default=False) print('Needs to be refactored') @unittest.skip('Needs to be refactored') class HouseholdsTest(unittest.TestCase): def setUp(self) -> None: np.random.seed(0) self.is_debugging = False self.d_datadir = sp.datadir self.d_location = "seattle_metro"
import synthpops as sp if __name__ == '__main__': sp.validate() datadir = sp.datadir state_location = 'Washington' location = 'seattle_metro' country_location = 'usa' sheet_name = 'United States of America' n = 2000 verbose = True verbose = False plot = True # plot = False use_default = False sp.generate_synthetic_population(n, datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, verbose=verbose, plot=plot, use_default=use_default)
import synthpops as sp sp.validate() datadir = sp.datadir # this should be where your demographics data folder resides location = 'seattle_metro' state_location = 'Washington' country_location = 'usa' sheet_name = 'United States of America' npop = 10000 # how many people in your population sp.generate_synthetic_population(npop, datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name)
country_location = 'usa' sheet_name = 'United States of America' n = 11000 verbose = False plot = True write = True # this will generate a population with microstructure and age demographics that approximate those of the location selected # also saves to file in: # datadir/demographics/contact_matrices_152_countries/state_location/ sp.generate_synthetic_population(n, datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, verbose=verbose, plot=plot, write=write) # load that population into a dictionary of individuals who know who their contacts are options_args = {'use_microstructure': True} network_distr_args = {'Npop': n} contacts = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) verbose = True
def make_population(n=None, max_contacts=None, generate=None, with_industry_code=False, with_facilities=False, use_two_group_reduction=True, average_LTCF_degree=20, rand_seed=None): ''' Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data. Args: n (int) : The number of people to create. max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work). generate (bool) : If True, first look for cached population files and if those are not available, generate new population with_industry_code (bool) : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US with_facilities (bool) : If True, create long term care facilities use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups average_LTCF_degree (int) : default average degree in long term care facilities Returns: network (dict): A dictionary of the full population with ages and connections. ''' if rand_seed is not None: sp.set_seed(rand_seed) default_n = 10000 default_max_contacts = { 'S': 20, 'W': 20 } # this can be anything but should be based on relevant average number of contacts for the population under study if n is None: n = default_n n = int(n) if n not in popsize_choices: if generate is False: choicestr = ', '.join([str(choice) for choice in popsize_choices]) errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}' raise ValueError(errormsg) else: generate = True # If not found, generate # Default to False, unless LTCF are requested if generate is None: if with_facilities: generate = True else: generate = False max_contacts = sc.mergedicts(default_max_contacts, max_contacts) country_location = 'usa' state_location = 'Washington' location = 'seattle_metro' sheet_name = 'United States of America' options_args = { 'use_microstructure': True, 'use_industry_code': with_industry_code, 'use_long_term_care_facilities': with_facilities, 'use_two_group_reduction': use_two_group_reduction, 'average_LTCF_degree': average_LTCF_degree } network_distr_args = {'Npop': int(n)} # Heavy lift 1: make the contacts and their connections if not generate: # must read in from file, will fail if the data has not yet been generated population = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) else: # make a new network on the fly if with_facilities and with_industry_code: errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.' raise ValueError(errormsg) elif with_facilities: population = sp.generate_microstructure_with_facilities( sp.datadir, location=location, state_location=state_location, country_location=country_location, n=n, return_popdict=True, use_two_group_reduction=use_two_group_reduction, average_LTCF_degree=average_LTCF_degree) else: population = sp.generate_synthetic_population( n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, plot=False, return_popdict=True) # Semi-heavy-lift 2: trim them to the desired numbers population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False) # Change types for key, person in population.items(): for layerkey in population[key]['contacts'].keys(): population[key]['contacts'][layerkey] = list( population[key]['contacts'][layerkey]) return population
datadir = sp.datadir # point datadir where your data folder lives location = 'seattle_metro' state_location = 'Washington' country_location = 'usa' sheet_name = 'United States of America' n = 10000 verbose = False plot = False # this will generate a population with microstructure and age demographics that approximate those of the location selected # also saves to file in: # datadir/demographics/contact_matrices_152_countries/state_location/ popdict = sp.generate_synthetic_population(n, datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name, verbose=verbose, plot=plot, return_popdict=True) # load that population into a dictionary of individuals who know who their contacts are options_args = {'use_microstructure': True} network_distr_args = {'Npop': n} # Extract individuals and their contacts contacts = sp.make_contacts(location=location, state_location=state_location, country_location=country_location, options_args=options_args, network_distr_args=network_distr_args) # show_layers(contacts, show_ages=True) uids = popdict.keys() # Extract keys