Example #1
0
 def test_scale(self):
     seed = 1
     # set param
     average_student_teacher_ratio = 22
     average_student_all_staff_ratio = 15
     datadir = self.dataDir
     location = 'seattle_metro'
     state_location = 'Washington'
     country_location = 'usa'
     i = 0
     for n in [2001, 10001]:
         try:
             pop = {}
             sp.set_seed(seed)
             print(seed)
             pop = sp.generate_synthetic_population(n, datadir,average_student_teacher_ratio=average_student_teacher_ratio,
                                                    average_student_all_staff_ratio=average_student_all_staff_ratio,
                                                    return_popdict=True)
             sc.savejson(os.path.join(self.resultdir, f"calltwice_{n}_{i}.json"), pop, indent=2)
             result = utilities.check_teacher_staff_ratio(pop, self.dataDir, f"calltwice_{n}_{i}", average_student_teacher_ratio,
                                                          average_student_all_staff_ratio=average_student_all_staff_ratio, err_margin=2)
             utilities_dist.check_enrollment_distribution(pop, n, datadir, location, state_location, country_location,
                                                     test_prefix=f"calltwice{n}_{i}", skip_stat_check=True, do_close=self.do_close)
             utilities_dist.check_age_distribution(pop, n, datadir, self.resultdir, location, state_location, country_location,
                                              test_prefix=f"calltwice{n}_{i}", do_close=self.do_close)
             i += 1
         except:
             print("check failed, continue...")
     return result
Example #2
0
    def test_with_ltcf(self):
        """
        test with long term care facilities options
        """
        seed = 1
        sp.set_seed(seed)
        # set param
        n = 10001
        datadir = self.dataDir
        location = 'seattle_metro'
        state_location = 'Washington'
        country_location = 'usa'
        sheet_name = 'United States of America'
        use_two_group_reduction = True
        average_LTCF_degree = 20
        ltcf_staff_age_min = 20
        ltcf_staff_age_max = 65
        with_school_types = True
        average_class_size = 20
        inter_grade_mixing = 0.1
        average_student_teacher_ratio = 20.0
        average_teacher_teacher_degree = 3
        teacher_age_min = 25
        teacher_age_max = 70

        with_non_teaching_staff = True
        average_student_all_staff_ratio = 11
        average_additional_staff_degree = 20
        staff_age_min = 20
        staff_age_max = 75
        school_mixing_type = {
            'pk': 'age_and_class_clustered',
            'es': 'random',
            'ms': 'age_clustered',
            'hs': 'random',
            'uv': 'random'
        }
        return_popdict = True

        vals = locals()
        pop = runpop(resultdir=self.resultdir,
                     testprefix="staff_ltcf",
                     actual_vals=vals,
                     method=sp.generate_microstructure_with_facilities)
        result = check_teacher_staff_ratio(pop,
                                           average_student_teacher_ratio,
                                           average_student_all_staff_ratio,
                                           err_margin=2)
Example #3
0
    def test_with_ltcf(self):
        """
        test with long term care facilities options
        """
        rand_seed = 1
        sp.set_seed(rand_seed)
        # set param
        n = self.n
        datadir = self.dataDir
        location = 'seattle_metro'
        state_location = 'Washington'
        country_location = 'usa'
        sheet_name = 'United States of America'
        use_two_group_reduction = True
        average_LTCF_degree = 20
        ltcf_staff_age_min = 20
        ltcf_staff_age_max = 65
        with_school_types = True
        average_class_size = 20
        inter_grade_mixing = 0.1
        average_student_teacher_ratio = 20.0
        average_teacher_teacher_degree = 3
        teacher_age_min = 25
        teacher_age_max = 70

        with_non_teaching_staff = True
        average_student_all_staff_ratio = 11
        average_additional_staff_degree = 20
        staff_age_min = 20
        staff_age_max = 75
        school_mixing_type = {'pk': 'age_and_class_clustered', 'es': 'random', 'ms': 'age_clustered', 'hs': 'random', 'uv': 'random'}
        return_popdict = True
        vals = locals()
        test_prefix = sys._getframe().f_code.co_name
        pop = utilities.runpop(resultdir=self.resultdir, testprefix=test_prefix, actual_vals=vals,
                               method=None)
        if self.do_plot:
            utilities.check_class_size(pop, average_class_size, average_student_teacher_ratio,
                                       average_student_all_staff_ratio, 1)
            result = utilities.check_teacher_staff_ratio(pop, datadir, test_prefix, average_student_teacher_ratio,
                                                         average_student_all_staff_ratio, err_margin=2)
            utilities_dist.check_age_distribution(pop, self.n, datadir, self.resultdir, location, state_location, country_location, test_prefix=test_prefix, do_close=self.do_close)
            utilities_dist.check_enrollment_distribution(pop, self.n, datadir, self.resultdir, location, state_location, country_location, test_prefix=test_prefix, do_close=self.do_close)
    def test_staff_generate(self):

        """
        generate a population and check if teacher/staff ratio match
        """
        rand_seed = 1
        sp.set_seed(rand_seed)
        #set param
        n = self.n
        datadir = self.dataDir
        location = 'seattle_metro'
        state_location = 'Washington'
        country_location = 'usa'
        sheet_name = 'United States of America'
        school_enrollment_counts_available = False
        with_school_types = False
        school_mixing_type = 'random'
        average_class_size = 20
        inter_grade_mixing = 0.1
        average_student_teacher_ratio = 20
        average_teacher_teacher_degree = 3
        teacher_age_min = 25
        teacher_age_max = 75
        average_student_all_staff_ratio = 12
        average_additional_staff_degree = 18
        staff_age_min = 20
        staff_age_max = 75
        return_popdict = True
        test_prefix = sys._getframe().f_code.co_name
        vals = locals()
        pop = utilities.runpop(resultdir=self.resultdir, testprefix=f"{test_prefix}", actual_vals=vals, method=sp.generate_synthetic_population)
        if self.do_plot:
            utilities.check_class_size(pop, average_class_size, average_student_teacher_ratio,
                                           average_student_all_staff_ratio, 1)
            result = utilities.check_teacher_staff_ratio(pop, self.dataDir, f"{test_prefix}", average_student_teacher_ratio, average_student_all_staff_ratio, err_margin=2)
            utilities_dist.check_age_distribution(pop, self.n, datadir, self.resultdir, location, state_location, country_location, test_prefix=test_prefix, do_close=self.do_close)
            utilities_dist.check_enrollment_distribution(pop, self.n, datadir, self.resultdir, location, state_location, country_location, test_prefix=f"{test_prefix}", do_close=self.do_close)
Example #5
0
    def test_staff_generate(self):
        """
        generate 10001 population and check if teacher/staff ratio match
        """
        seed = 1
        sp.set_seed(seed)
        # set param
        n = 10001
        datadir = self.dataDir
        location = 'seattle_metro'
        state_location = 'Washington'
        country_location = 'usa'
        sheet_name = 'United States of America'
        school_enrollment_counts_available = False
        with_school_types = False
        school_mixing_type = 'random'
        average_class_size = 20
        inter_grade_mixing = 0.1
        average_student_teacher_ratio = 20
        average_teacher_teacher_degree = 3
        teacher_age_min = 25
        teacher_age_max = 75
        average_student_all_staff_ratio = 12
        average_additional_staff_degree = 18
        staff_age_min = 20
        staff_age_max = 75
        return_popdict = True

        test_prefix = sys._getframe().f_code.co_name
        vals = locals()
        pop = runpop(resultdir=self.resultdir,
                     testprefix="staff_generate",
                     actual_vals=vals,
                     method=sp.generate_synthetic_population)
        result = check_teacher_staff_ratio(pop, average_student_teacher_ratio,
                                           average_student_all_staff_ratio)
Example #6
0
def make_population(n=None, max_contacts=None, generate=None, with_industry_code=False, with_facilities=False,
                    use_two_group_reduction=True, average_LTCF_degree=20, ltcf_staff_age_min=20, ltcf_staff_age_max=60,
                    with_school_types=False, school_mixing_type='random', average_class_size=20, inter_grade_mixing=0.1,
                    average_student_teacher_ratio=20, average_teacher_teacher_degree=3, teacher_age_min=25, teacher_age_max=75,
                    with_non_teaching_staff=False,
                    average_student_all_staff_ratio=15, average_additional_staff_degree=20, staff_age_min=20, staff_age_max=75,
                    rand_seed=None):
    '''
    Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington data.

    Args:
        n (int)                                 : The number of people to create.
        max_contacts (dict)                     : A dictionary for maximum number of contacts per layer: keys must be "W" (work).
        generate (bool)                         : If True, generate a new population. Else, look for cached population and if those are not available, generate a new population.
        with_industry_code (bool)               : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US.
        with_facilities (bool)                  : If True, create long term care facilities, currently only available for locations in the US.
        use_two_group_reduction (bool)          : If True, create long term care facilities with reduced contacts across both groups.
        average_LTCF_degree (float)             : default average degree in long term care facilities.
        ltcf_staff_age_min (int)                : Long term care facility staff minimum age.
        ltcf_staff_age_max (int)                : Long term care facility staff maximum age.
        with_school_types (bool)                : If True, creates explicit school types.
        school_mixing_type (str or dict)        : The mixing type for schools, 'random', 'age_clustered', or 'age_and_class_clustered' if string, and a dictionary of these by school type otherwise.
        average_class_size (float)              : The average classroom size.
        inter_grade_mixing (float)              : The average fraction of mixing between grades in the same school for clustered school mixing types.
        average_student_teacher_ratio (float)   : The average number of students per teacher.
        average_teacher_teacher_degree (float)  : The average number of contacts per teacher with other teachers.
        teacher_age_min (int)                   : The minimum age for teachers.
        teacher_age_max (int)                   : The maximum age for teachers.
        with_non_teaching_staff (bool)          : If True, includes non teaching staff.
        average_student_all_staff_ratio (float) : The average number of students per staff members at school (including both teachers and non teachers).
        average_additional_staff_degree (float) : The average number of contacts per additional non teaching staff in schools.
        staff_age_min (int)                     : The minimum age for non teaching staff.
        staff_age_max (int)                     : The maximum age for non teaching staff.
        rand_seed (int)                         : Start point random sequence is generated from.

    Returns:
        network (dict): A dictionary of the full population with ages and connections.
    '''
    log.debug('make_population()')

    if rand_seed is not None:
        sp.set_seed(rand_seed)

    default_n = 10000
    default_max_contacts = {'W': 20}  # this can be anything but should be based on relevant average number of contacts for the population under study

    if n is None:
        n = default_n
    n = int(n)

    if n not in popsize_choices:
        if generate is False:
            choicestr = ', '.join([str(choice) for choice in popsize_choices])
            errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}'
            raise ValueError(errormsg)
        else:
            generate = True  # If n not found in popsize_choices and generate was not False, generate a new population.

    # Default to False, unless LTCF are requested
    if generate is None:
        if with_facilities:
            generate = True
        else:
            generate = False

    max_contacts = sc.mergedicts(default_max_contacts, max_contacts)

    country_location = 'usa'
    state_location = 'Washington'
    location = 'seattle_metro'
    sheet_name = 'United States of America'

    options_args = {}
    options_args['use_microstructure'] = True
    options_args['use_industry_code'] = with_industry_code
    options_args['use_long_term_care_facilities'] = with_facilities
    options_args['use_two_group_reduction'] = use_two_group_reduction
    options_args['with_school_types'] = with_school_types
    options_args['with_non_teaching_staff'] = with_non_teaching_staff

    network_distr_args = {}
    network_distr_args['Npop'] = int(n)

    network_distr_args['average_LTCF_degree'] = average_LTCF_degree

    network_distr_args['average_class_size'] = average_class_size
    network_distr_args['average_student_teacher_ratio'] = average_student_teacher_ratio
    network_distr_args['average_teacher_teacher_degree'] = average_teacher_teacher_degree
    network_distr_args['inter_grade_mixing'] = inter_grade_mixing
    network_distr_args['average_student_all_staff_ratio'] = average_student_all_staff_ratio
    network_distr_args['average_additional_staff_degree'] = average_additional_staff_degree
    network_distr_args['school_mixing_type'] = school_mixing_type

    # Heavy lift 1: make the contacts and their connections
    if not generate:
        log.debug('Not generating a new population')
        # must read in from file, will fail if the data has not yet been generated
        population = sp.make_contacts(location=location, state_location=state_location,
                                      country_location=country_location, sheet_name=sheet_name,
                                      options_args=options_args,
                                      network_distr_args=network_distr_args)
    else:
        log.debug('Generating a new population...')
        if with_facilities and with_industry_code:
            errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.'
            raise ValueError(errormsg)
        elif with_facilities:
            population = sp.generate_microstructure_with_facilities(sp.datadir, location=location, state_location=state_location, country_location=country_location, n=n, sheet_name=sheet_name,
                                                                    use_two_group_reduction=use_two_group_reduction, average_LTCF_degree=average_LTCF_degree, ltcf_staff_age_min=ltcf_staff_age_min, ltcf_staff_age_max=ltcf_staff_age_max,
                                                                    with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing,
                                                                    average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max,
                                                                    average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max,
                                                                    return_popdict=True, trimmed_size_dic=max_contacts)
        else:
            population = sp.generate_synthetic_population(n, sp.datadir, location=location, state_location=state_location, country_location=country_location, sheet_name=sheet_name,
                                                          with_school_types=with_school_types, school_mixing_type=school_mixing_type, average_class_size=average_class_size, inter_grade_mixing=inter_grade_mixing,
                                                          average_student_teacher_ratio=average_student_teacher_ratio, average_teacher_teacher_degree=average_teacher_teacher_degree, teacher_age_min=teacher_age_min, teacher_age_max=teacher_age_max,
                                                          average_student_all_staff_ratio=average_student_all_staff_ratio, average_additional_staff_degree=average_additional_staff_degree, staff_age_min=staff_age_min, staff_age_max=staff_age_max,
                                                          return_popdict=True, trimmed_size_dic=max_contacts,
                                                          )

    # Semi-heavy-lift 2: trim them to the desired numbers
    # population = sp.trim_contacts(population, trimmed_size_dic=max_contacts, use_clusters=False)

    # Change types
    for key, person in population.items():
        for layerkey in population[key]['contacts'].keys():
            population[key]['contacts'][layerkey] = list(population[key]['contacts'][layerkey])

    log.debug('make_population(): done.')
    return population
Example #7
0
def make_population(n=None,
                    max_contacts=None,
                    generate=None,
                    with_industry_code=False,
                    with_facilities=False,
                    use_two_group_reduction=True,
                    average_LTCF_degree=20,
                    rand_seed=None):
    '''
    Make a full population network including both people (ages, sexes) and contacts using Seattle, Washington cached data.

    Args:
        n (int)                        : The number of people to create.
        max_contacts (dict)            : A dictionary for maximum number of contacts per layer: keys must be "S" (school) and/or "W" (work).
        generate (bool)                : If True, first look for cached population files and if those are not available, generate new population
        with_industry_code (bool)      : If True, assign industry codes for workplaces, currently only possible for cached files of populations in the US
        with_facilities (bool)         : If True, create long term care facilities
        use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups
        average_LTCF_degree (int)      : default average degree in long term care facilities

    Returns:
        network (dict): A dictionary of the full population with ages and connections.

    '''

    if rand_seed is not None:
        sp.set_seed(rand_seed)

    default_n = 10000
    default_max_contacts = {
        'S': 20,
        'W': 20
    }  # this can be anything but should be based on relevant average number of contacts for the population under study

    if n is None:
        n = default_n
    n = int(n)

    if n not in popsize_choices:
        if generate is False:
            choicestr = ', '.join([str(choice) for choice in popsize_choices])
            errormsg = f'If generate=False, number of people must be one of {choicestr}, not {n}'
            raise ValueError(errormsg)
        else:
            generate = True  # If not found, generate

    # Default to False, unless LTCF are requested
    if generate is None:
        if with_facilities:
            generate = True
        else:
            generate = False

    max_contacts = sc.mergedicts(default_max_contacts, max_contacts)

    country_location = 'usa'
    state_location = 'Washington'
    location = 'seattle_metro'
    sheet_name = 'United States of America'

    options_args = {
        'use_microstructure': True,
        'use_industry_code': with_industry_code,
        'use_long_term_care_facilities': with_facilities,
        'use_two_group_reduction': use_two_group_reduction,
        'average_LTCF_degree': average_LTCF_degree
    }
    network_distr_args = {'Npop': int(n)}

    # Heavy lift 1: make the contacts and their connections
    if not generate:
        # must read in from file, will fail if the data has not yet been generated
        population = sp.make_contacts(location=location,
                                      state_location=state_location,
                                      country_location=country_location,
                                      options_args=options_args,
                                      network_distr_args=network_distr_args)
    else:
        # make a new network on the fly
        if with_facilities and with_industry_code:
            errormsg = f'Requesting both long term care facilities and industries by code is not supported yet.'
            raise ValueError(errormsg)
        elif with_facilities:
            population = sp.generate_microstructure_with_facilities(
                sp.datadir,
                location=location,
                state_location=state_location,
                country_location=country_location,
                n=n,
                return_popdict=True,
                use_two_group_reduction=use_two_group_reduction,
                average_LTCF_degree=average_LTCF_degree)
        else:
            population = sp.generate_synthetic_population(
                n,
                sp.datadir,
                location=location,
                state_location=state_location,
                country_location=country_location,
                sheet_name=sheet_name,
                plot=False,
                return_popdict=True)

    # Semi-heavy-lift 2: trim them to the desired numbers
    population = sp.trim_contacts(population,
                                  trimmed_size_dic=max_contacts,
                                  use_clusters=False)

    # Change types
    for key, person in population.items():
        for layerkey in population[key]['contacts'].keys():
            population[key]['contacts'][layerkey] = list(
                population[key]['contacts'][layerkey])
    return population