Exemple #1
0
 def test_scale(self):
     seed = 1
     # set param
     average_student_teacher_ratio = 22
     average_student_all_staff_ratio = 15
     datadir = self.dataDir
     location = 'seattle_metro'
     state_location = 'Washington'
     country_location = 'usa'
     i = 0
     for n in [2001, 10001]:
         try:
             pop = {}
             sp.set_seed(seed)
             print(seed)
             pop = sp.generate_synthetic_population(n, datadir,average_student_teacher_ratio=average_student_teacher_ratio,
                                                    average_student_all_staff_ratio=average_student_all_staff_ratio,
                                                    return_popdict=True)
             sc.savejson(os.path.join(self.resultdir, f"calltwice_{n}_{i}.json"), pop, indent=2)
             result = utilities.check_teacher_staff_ratio(pop, self.dataDir, f"calltwice_{n}_{i}", average_student_teacher_ratio,
                                                          average_student_all_staff_ratio=average_student_all_staff_ratio, err_margin=2)
             utilities_dist.check_enrollment_distribution(pop, n, datadir, location, state_location, country_location,
                                                     test_prefix=f"calltwice{n}_{i}", skip_stat_check=True, do_close=self.do_close)
             utilities_dist.check_age_distribution(pop, n, datadir, self.resultdir, location, state_location, country_location,
                                              test_prefix=f"calltwice{n}_{i}", do_close=self.do_close)
             i += 1
         except:
             print("check failed, continue...")
     return result
Exemple #2
0
import json
import collections
import scipy
import utilities
import synthpops as sp
from synthpops import households as sphh
from synthpops import data_distributions as spdd

# the default test data was generated for 500 people using the below parameters
# and each test case will validate the properties of the population named "seapop_500"
seapop_500 = sp.generate_synthetic_population(
    n=500,
    datadir=sp.settings.datadir,
    location='seattle_metro',
    state_location='Washington',
    country_location='usa',
    sheet_name='United States of America',
    plot=False,
    write=False,
    return_popdict=True,
    use_default=False,
)


class HouseholdsTest(unittest.TestCase):
    def setUp(self) -> None:
        """
        Set up class variables

        Returns:
            None
        """
Exemple #3
0
def make_population(n=None,
                    max_contacts=None,
                    as_objdict=False,
                    generate=False):
    '''
    Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data.

    Args:
        n (int)             : The number of people to create.
        max_contacts (dict) : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work).
        as_objdict (bool)   : If True, change popdict type to ``sc.objdict``.
        generate (bool)     : If True, first look for cached population files and if those are not available, generate new population

    Returns:
        network (dict): A dictionary of the full population with ages and connections.

    '''

    default_n = 10000
    default_max_contacts = {
        'S': 20,
        'W': 10
    }  # this can be anything but should be based on relevant average number of contacts for the population under study

    if n is None: n = default_n
    n = int(n)
    if n not in popsize_choices:
        if not generate:
            choicestr = ', '.join([str(choice) for choice in popsize_choices])
            errormsg = f'Number of people must be one of {choicestr}, not {n}'

            raise ValueError(errormsg)

        # else:
        # Let's start generating a new network shall we?

    max_contacts = sc.mergedicts(default_max_contacts, max_contacts)

    country_location = 'usa'
    state_location = 'Washington'
    location = 'seattle_metro'
    sheet_name = 'United States of America'

    options_args = {'use_microstructure': True}
    network_distr_args = {'Npop': int(n)}

    # Heavy lift 1: make the contacts and their connections
    try:
        # try to read in from file
        population = sp.make_contacts(location=location,
                                      state_location=state_location,
                                      country_location=country_location,
                                      options_args=options_args,
                                      network_distr_args=network_distr_args)
    except:
        # make a new network on the fly
        if generate:
            population = sp.generate_synthetic_population(
                n,
                sp.datadir,
                location=location,
                state_location=state_location,
                country_location=country_location,
                sheet_name=sheet_name,
                plot=False,
                return_popdict=True)
        else:
            raise ValueError(errormsg)

    # Semi-heavy-lift 2: trim them to the desired numbers
    population = sp.trim_contacts(population,
                                  trimmed_size_dic=max_contacts,
                                  use_clusters=False)

    # Change types
    if as_objdict:
        population = sc.objdict(population)
    for key, person in population.items():
        if as_objdict:
            population[key] = sc.objdict(population[key])
            population[key]['contacts'] = sc.objdict(
                population[key]['contacts'])
        for layerkey in population[key]['contacts'].keys():
            population[key]['contacts'][layerkey] = list(
                population[key]['contacts'][layerkey])

    return population
Exemple #4
0
import synthpops as sp

if __name__ == '__main__':
    sp.validate()
    datadir = sp.datadir

    state_location = 'Washington'
    location = 'seattle_metro'
    country_location = 'usa'
    sheet_name = 'United States of America'

    n = 2000
    verbose = True
    verbose = False
    plot = True
    # plot = False
    school_enrollment_counts_available = False
    use_default = False

    sp.generate_synthetic_population(
        n,
        datadir,
        location=location,
        state_location=state_location,
        country_location=country_location,
        sheet_name=sheet_name,
        school_enrollment_counts_available=school_enrollment_counts_available,
        verbose=verbose,
        plot=plot,
        use_default=use_default)
Exemple #5
0
def make_population(n=None, max_contacts=None, generate=None, with_industry_code=False, with_facilities=False,
                    use_two_group_reduction=True, average_LTCF_degree=20, ltcf_staff_age_min=20, ltcf_staff_age_max=60,
                    with_school_types=False, school_mixing_type='random', average_class_size=20, inter_grade_mixing=0.1,
                    average_student_teacher_ratio=20, average_teacher_teacher_degree=3, teacher_age_min=25, teacher_age_max=75,
                    with_non_teaching_staff=False,
                    average_student_all_staff_ratio=15, average_additional_staff_degree=20, staff_age_min=20, staff_age_max=75,
                    rand_seed=None):
    '''
    Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington data.

    Args:
        n (int)                                 : The number of people to create.
        max_contacts (dict)                     : A dictionary for maximum number of contacts per layer: keys must be "W" (work).
        generate (bool)                         : If True, generate a new population. Else, look for cached population and if those are not available, generate a new population.
        with_industry_code (bool)               : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US.
        with_facilities (bool)                  : If True, create long term care facilities, currently only available for locations in the US.
        use_two_group_reduction (bool)          : If True, create long term care facilities with reduced contacts across both groups.
        average_LTCF_degree (float)             : default average degree in long term care facilities.
        ltcf_staff_age_min (int)                : Long term care facility staff minimum age.
        ltcf_staff_age_max (int)                : Long term care facility staff maximum age.
        with_school_types (bool)                : If True, creates explicit school types.
        school_mixing_type (str or dict)        : The mixing type for schools, 'random', 'age_clustered', or 'age_and_class_clustered' if string, and a dictionary of these by school type otherwise.
        average_class_size (float)              : The average classroom size.
        inter_grade_mixing (float)              : The average fraction of mixing between grades in the same school for clustered school mixing types.
        average_student_teacher_ratio (float)   : The average number of students per teacher.
        average_teacher_teacher_degree (float)  : The average number of contacts per teacher with other teachers.
        teacher_age_min (int)                   : The minimum age for teachers.
        teacher_age_max (int)                   : The maximum age for teachers.
        with_non_teaching_staff (bool)          : If True, includes non teaching staff.
        average_student_all_staff_ratio (float) : The average number of students per staff members at school (including both teachers and non teachers).
        average_additional_staff_degree (float) : The average number of contacts per additional non teaching staff in schools.
        staff_age_min (int)                     : The minimum age for non teaching staff.
        staff_age_max (int)                     : The maximum age for non teaching staff.
        rand_seed (int)                         : Start point random sequence is generated from.

    Returns:
        network (dict): A dictionary of the full population with ages and connections.
    '''
    log.debug('make_population()')

    if rand_seed is not None:
        sp.set_seed(rand_seed)

    default_n = 10000
    default_max_contacts = {'W': 20}  # this can be anything but should be based on relevant average number of contacts for the population under study

    if n is None:
        n = default_n
    n = int(n)

    if n not in popsize_choices:
        if generate is False:
            choicestr = ', '.join([str(choice) for choice in popsize_choices])
            errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}'
            raise ValueError(errormsg)
        else:
            generate = True  # If n not found in popsize_choices and generate was not False, generate a new population.

    # Default to False, unless LTCF are requested
    if generate is None:
        if with_facilities:
            generate = True
        else:
            generate = False

    max_contacts = sc.mergedicts(default_max_contacts, max_contacts)

    country_location = 'usa'
    state_location = 'Washington'
    location = 'seattle_metro'
    sheet_name = 'United States of America'

    options_args = {}
    options_args['use_microstructure'] = True
    options_args['use_industry_code'] = with_industry_code
    options_args['use_long_term_care_facilities'] = with_facilities
    options_args['use_two_group_reduction'] = use_two_group_reduction
    options_args['with_school_types'] = with_school_types
    options_args['with_non_teaching_staff'] = with_non_teaching_staff

    network_distr_args = {}
    network_distr_args['Npop'] = int(n)

    network_distr_args['average_LTCF_degree'] = average_LTCF_degree

    network_distr_args['average_class_size'] = average_class_size
    network_distr_args['average_student_teacher_ratio'] = average_student_teacher_ratio
    network_distr_args['average_teacher_teacher_degree'] = average_teacher_teacher_degree
    network_distr_args['inter_grade_mixing'] = inter_grade_mixing
    network_distr_args['average_student_all_staff_ratio'] = average_student_all_staff_ratio
    network_distr_args['average_additional_staff_degree'] = average_additional_staff_degree
    network_distr_args['school_mixing_type'] = school_mixing_type

    # Heavy lift 1: make the contacts and their connections
    if not generate:
        log.debug('Not generating a new population')
        # must read in from file, will fail if the data has not yet been generated
        population = sp.make_contacts(location=location, state_location=state_location,
                                      country_location=country_location, sheet_name=sheet_name,
                                      options_args=options_args,
                                      network_distr_args=network_distr_args)
    else:
        log.debug('Generating a new population...')
        if with_facilities and with_industry_code:
            errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.'
            raise ValueError(errormsg)
        elif with_facilities:
            population = sp.generate_microstructure_with_facilities(sp.datadir, location=location, state_location=state_location, country_location=country_location, n=n, sheet_name=sheet_name,
                                                                    use_two_group_reduction=use_two_group_reduction, average_LTCF_degree=average_LTCF_degree, ltcf_staff_age_min=ltcf_staff_age_min, ltcf_staff_age_max=ltcf_staff_age_max,
                                                                    with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing,
                                                                    average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max,
                                                                    average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max,
                                                                    return_popdict=True, trimmed_size_dic=max_contacts)
        else:
            population = sp.generate_synthetic_population(n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name,
                                                          with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing,
                                                          average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max,
                                                          average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max,
                                                          return_popdict=True, trimmed_size_dic=max_contacts,
                                                          )

    # Semi-heavy-lift 2: trim them to the desired numbers
    # population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False)

    # Change types
    for key, person in population.items():
        for layerkey in population[key]['contacts'].keys():
            population[key]['contacts'][layerkey] = list(population[key]['contacts'][layerkey])

    log.debug('make_population(): done.')
    return population
Exemple #6
0
import unittest
import numpy as np
import json
import synthpops as sp
from synthpops import households as sphh
from synthpops import data_distributions as spdd

seapop_500 = sp.generate_synthetic_population(
    n=500,
    datadir=sp.datadir,
    location='seattle_metro',
    state_location='Washington',
    country_location='usa',
    sheet_name='United States of America',
    school_enrollment_counts_available=False,
    verbose=False,
    plot=False,
    write=False,
    return_popdict=True,
    use_default=False)

print('Needs to be refactored')


@unittest.skip('Needs to be refactored')
class HouseholdsTest(unittest.TestCase):
    def setUp(self) -> None:
        np.random.seed(0)
        self.is_debugging = False
        self.d_datadir = sp.datadir
        self.d_location = "seattle_metro"
import synthpops as sp

if __name__ == '__main__':
    sp.validate()
    datadir = sp.datadir

    state_location = 'Washington'
    location = 'seattle_metro'
    country_location = 'usa'
    sheet_name = 'United States of America'

    n = 2000
    verbose = True
    verbose = False
    plot = True
    # plot = False
    use_default = False

    sp.generate_synthetic_population(n,
                                     datadir,
                                     location=location,
                                     state_location=state_location,
                                     country_location=country_location,
                                     sheet_name=sheet_name,
                                     verbose=verbose,
                                     plot=plot,
                                     use_default=use_default)
Exemple #8
0
import synthpops as sp

sp.validate()

datadir = sp.datadir  # this should be where your demographics data folder resides

location = 'seattle_metro'
state_location = 'Washington'
country_location = 'usa'
sheet_name = 'United States of America'

npop = 10000  # how many people in your population
sp.generate_synthetic_population(npop,
                                 datadir,
                                 location=location,
                                 state_location=state_location,
                                 country_location=country_location,
                                 sheet_name=sheet_name)
    country_location = 'usa'
    sheet_name = 'United States of America'

    n = 11000
    verbose = False
    plot = True
    write = True

    # this will generate a population with microstructure and age demographics that approximate those of the location selected
    # also saves to file in:
    #    datadir/demographics/contact_matrices_152_countries/state_location/
    sp.generate_synthetic_population(n,
                                     datadir,
                                     location=location,
                                     state_location=state_location,
                                     country_location=country_location,
                                     sheet_name=sheet_name,
                                     verbose=verbose,
                                     plot=plot,
                                     write=write)

    # load that population into a dictionary of individuals who know who their contacts are
    options_args = {'use_microstructure': True}
    network_distr_args = {'Npop': n}
    contacts = sp.make_contacts(location=location,
                                state_location=state_location,
                                country_location=country_location,
                                options_args=options_args,
                                network_distr_args=network_distr_args)

    verbose = True
Exemple #10
0
def make_population(n=None,
                    max_contacts=None,
                    generate=None,
                    with_industry_code=False,
                    with_facilities=False,
                    use_two_group_reduction=True,
                    average_LTCF_degree=20,
                    rand_seed=None):
    '''
    Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data.

    Args:
        n (int)                        : The number of people to create.
        max_contacts (dict)            : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work).
        generate (bool)                : If True, first look for cached population files and if those are not available, generate new population
        with_industry_code (bool)      : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US
        with_facilities (bool)         : If True, create long term care facilities
        use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups
        average_LTCF_degree (int)      : default average degree in long term care facilities

    Returns:
        network (dict): A dictionary of the full population with ages and connections.

    '''

    if rand_seed is not None:
        sp.set_seed(rand_seed)

    default_n = 10000
    default_max_contacts = {
        'S': 20,
        'W': 20
    }  # this can be anything but should be based on relevant average number of contacts for the population under study

    if n is None:
        n = default_n
    n = int(n)

    if n not in popsize_choices:
        if generate is False:
            choicestr = ', '.join([str(choice) for choice in popsize_choices])
            errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}'
            raise ValueError(errormsg)
        else:
            generate = True  # If not found, generate

    # Default to False, unless LTCF are requested
    if generate is None:
        if with_facilities:
            generate = True
        else:
            generate = False

    max_contacts = sc.mergedicts(default_max_contacts, max_contacts)

    country_location = 'usa'
    state_location = 'Washington'
    location = 'seattle_metro'
    sheet_name = 'United States of America'

    options_args = {
        'use_microstructure': True,
        'use_industry_code': with_industry_code,
        'use_long_term_care_facilities': with_facilities,
        'use_two_group_reduction': use_two_group_reduction,
        'average_LTCF_degree': average_LTCF_degree
    }
    network_distr_args = {'Npop': int(n)}

    # Heavy lift 1: make the contacts and their connections
    if not generate:
        # must read in from file, will fail if the data has not yet been generated
        population = sp.make_contacts(location=location,
                                      state_location=state_location,
                                      country_location=country_location,
                                      options_args=options_args,
                                      network_distr_args=network_distr_args)
    else:
        # make a new network on the fly
        if with_facilities and with_industry_code:
            errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.'
            raise ValueError(errormsg)
        elif with_facilities:
            population = sp.generate_microstructure_with_facilities(
                sp.datadir,
                location=location,
                state_location=state_location,
                country_location=country_location,
                n=n,
                return_popdict=True,
                use_two_group_reduction=use_two_group_reduction,
                average_LTCF_degree=average_LTCF_degree)
        else:
            population = sp.generate_synthetic_population(
                n,
                sp.datadir,
                location=location,
                state_location=state_location,
                country_location=country_location,
                sheet_name=sheet_name,
                plot=False,
                return_popdict=True)

    # Semi-heavy-lift 2: trim them to the desired numbers
    population = sp.trim_contacts(population,
                                  trimmed_size_dic=max_contacts,
                                  use_clusters=False)

    # Change types
    for key, person in population.items():
        for layerkey in population[key]['contacts'].keys():
            population[key]['contacts'][layerkey] = list(
                population[key]['contacts'][layerkey])
    return population
Exemple #11
0
datadir = sp.datadir  # point datadir where your data folder lives
location = 'seattle_metro'
state_location = 'Washington'
country_location = 'usa'
sheet_name = 'United States of America'
n = 10000
verbose = False
plot = False
# this will generate a population with microstructure and age demographics that approximate those of the location selected
# also saves to file in:
#    datadir/demographics/contact_matrices_152_countries/state_location/
popdict = sp.generate_synthetic_population(n,
                                           datadir,
                                           location=location,
                                           state_location=state_location,
                                           country_location=country_location,
                                           sheet_name=sheet_name,
                                           verbose=verbose,
                                           plot=plot,
                                           return_popdict=True)
# load that population into a dictionary of individuals who know who their contacts are
options_args = {'use_microstructure': True}
network_distr_args = {'Npop': n}
# Extract individuals and their contacts
contacts = sp.make_contacts(location=location,
                            state_location=state_location,
                            country_location=country_location,
                            options_args=options_args,
                            network_distr_args=network_distr_args)
# show_layers(contacts, show_ages=True)
uids = popdict.keys()  # Extract keys