Beispiel #1
0
    def test_constraint_check(self):
        location = sp.load_location_from_filepath(
            "test_location_grand_child.json", check_constraints=True)
        self.assertEqual(location['school_size_distribution'], [0.5, 0.5])

        with self.assertWarns(Warning) as wn:
            sp.load_location_from_filepath("test_location_bad.json",
                                           check_constraints=True)
            self.assertTrue(
                'has some negative values' in str(wn),
                'Check failed: expect to get negative distribution check messages'
            )
Beispiel #2
0
    def test_check_probability_distribution_nonnegative(self, location_name='usa-Washington-seattle_metro', property_list=None):
        """
        Run all checks for fields in property_list representing probability distributions. Each
        should have all non negative values.

        Args:
            location_name(str)   : name of the location json to test
            property_list (list) : list of properties to check the sum of the probabilityd distribution
        """
        location_file_path = f"{location_name}.json"
        location = sp.load_location_from_filepath(location_file_path)

        if property_list is None:
            sp.logger.info(f"\nTesting all probability distributions are all non negative for {location_name}.")
            checks, msgs = sp.check_all_probability_distribution_nonnegative(location)

            err_msgs = [msg for msg in msgs if msg is not None]
            err_msg = "\n".join(err_msgs)
            assert sum(checks) == len(checks), err_msg  # assert that all checks passed
            print(f'All {sum(checks)} checks passed.')

        else:
            # Examples of how the non negative checks can be run for a subset of properties
            sp.logger.info(f"\nTesting a subset of probability distributions are all non negative for {location_name}")
            for i, property_name in enumerate(property_list):
                check, msg = sp.check_probability_distribution_nonnegative(location, property_name)
                assert check == True, msg
                print(f'{property_name} check passed.')
Beispiel #3
0
    def test_check_probability_distribution_sums(self, location_name='usa-Washington-seattle_metro', property_list=None, tolerance=1e-2):
        """
        Run all checks for fields in property_list representing probability distributions. Each
        should have a sum that equals 1 within the tolerance level.

        Args:
            location_name(str)   : name of the location json to test
            property_list (list) : list of properties to check the sum of the probabilityd distribution
            tolerance (float)    : difference from the sum of 1 tolerated
        """
        location_file_path = f"{location_name}.json"
        location = sp.load_location_from_filepath(location_file_path)

        if property_list is None:
            sp.logger.info(f"\nTesting all probability distributions sum to 1 or within tolerance {tolerance} for {location_name}.")
            checks, msgs = sp.check_all_probability_distribution_sums(location, tolerance)

            err_msgs = [msg for msg in msgs if msg is not None]  # only get the msgs for failures
            err_msg = "\n".join(err_msgs)
            assert sum(checks) == len(checks), err_msg  # assert that all checks passed
            print(f'All {sum(checks)} checks passed.')

        else:
            # Example of how the sum checks can be run for a subset of properties
            sp.logger.info(f"\nTesting a subset of probability distributions sum to 1 or within tolerance {tolerance} for {location_name}.")
            for i, property_name in enumerate(property_list):
                check, msg = sp.check_probability_distribution_sum(location, property_name, tolerance)
                assert check == True, msg
                print(f'{property_name} check passed.')
Beispiel #4
0
    def setup_convert_df_to_json_array(self, pars):
        """
        Set up objects to compare.

        Args:
            pars (dict): dictionary to get the data array and json array for comparison.

        Returns:
            array, json.array : An array of the desired data from a dataframe and
            the json entry for comparison.
        """
        df = pd.read_csv(pars.filepath)

        # columns to include : include all by default
        if pars.cols_ind == []:
            cols = df.columns
        else:
            cols = df.columns[
                pars.
                cols_ind]  # use indices to indicate which columns to include

        if pars.int_cols_ind == []:
            int_cols = pars.int_cols_ind
        else:
            int_cols = list(df.columns[pars.int_cols_ind].values)

        # array-ify all the data, convert some columns to integers
        arr = sp.convert_df_to_json_array(df, cols, int_cols)

        # corresponding json data object for the same location and data
        location = sp.load_location_from_filepath(f"{pars.location_name}.json")

        json_array = getattr(location, pars.property_name)

        if pars.property_name == 'population_age_distributions':
            json_array = [j for j in json_array
                          if j.num_bins == len(arr)][0].distribution

        return arr, json_array
    # get the head of household age brackets and populate the household head age brackets field
    get_household_head_age_brackets(location_data)

    # get the head of household age distribution by household or family size and populate the household head age distribution by family size field
    get_household_head_age_distribution_by_family_size(location_data)

    # get the workplace size distribution array and populate the workplace size distribution field
    get_workplace_size_counts_by_num_personnel(location_data)

    # get age ranges by school type and populate the school types by age --- defines an age range for each school type
    get_school_types_by_age(location_data)

    # get reference links and populate the reference links
    # you might do this step repeatedly as you find more data and populate your json
    get_reference_links(location_data)

    # TODO: adding notes to your json --- sometimes you may want to add additional information on how data were inferred
    # you can also implement a method get_note with decorator '@location_data_construct' to return list of notes
    note = ""
    # add note
    location_data.notes.append(note)

    # save the loaded json file
    sp.save_location_to_filepath(location_data, json_filepath)
    print('saved.')

    # check that you can reload the newly created json
    new_location_data = sp.load_location_from_filepath(json_filepath)
    print(f'{new_location_data.location_name} loaded.')
Beispiel #6
0
 def test_load_minimal_location_with_parent_filepath_from_filepath(self):
     child_filepath = os.path.join("unittests", "test_location_child.json")
     location = sp.load_location_from_filepath(child_filepath)
     self.check_minimal_location_with_parent(location)
        cols=df.columns,
        int_cols=['workplace_size_min', 'workplace_size_max'])
    location_data.workplace_size_counts_by_num_personnel = workplace_size_dist_arr
    return location_data


if __name__ == '__main__':

    location_name = "Nepal"

    # path to the new json we want to create for Zimbabwe
    json_filepath = os.path.join(sp.settings.datadir, f'{location_name}.json')

    # check if the file already exists and if not, create one
    try:
        location_data = sp.load_location_from_filepath(json_filepath)
    except:
        location_data = sp.Location()

    # add the country as the location_name
    location_data.location_name = location_name

    # add age distribution data from raw data files
    location_data = process_age_dists(location_data)

    # add employment rates by age data from raw data files
    location_data = process_employment_rates(location_data)

    # add enrollment rates by age data from raw data files
    location_data = process_enrollment_rates(location_data)
Beispiel #8
0
 def test_load_minimal_location_with_parent_filepath_from_filepath(self):
     child_filepath = "test_location_child.json"
     location = sp.load_location_from_filepath(child_filepath,
                                               check_constraints=False)
     self.check_minimal_location_with_parent(location)
    # We'll load location data from here.
    input_location_filepath = "usa.json"
    # After we modify some of the location data, we'll save it here.
    output_location_filepath = "example_location.json"

    # print(f'Loading location from [{input_location_filepath}], relative to synthpops config datadir: [{synthpops.config.datadir}]')
    print(
        f'Loading location from [{input_location_filepath}], relative to synthpops config datadir: [{sp.datadir}]'
    )

    # Load the location data file.  When we invoke load_location_from_filepath() below, the argument will be
    # interpreted relative to the directory provided to synthpops.config.set_datadir(). In this case,
    # we are setting that to be the python working directory.  So, the argument to load_location_from_filepath()
    # will be interpreted relative to the python working directory.

    location_data: sp.Location = sp.load_location_from_filepath(
        input_location_filepath)

    print('Modifying the location data...')
    # Add a note to the notes field.
    location_data.notes.append("Here's a new note added by the example code.")

    # Overwrite a field.  Here we have a community where kids are expected to do a lot of chores.
    location_data.employment_rates_by_age = [[1, 0.00], [2, 0.00], [3, 0.00],
                                             [4, 0.00], [5, 0.15], [6, 0.50]]

    # Clear a field, irrespective of whatever it was set to before.
    location_data.household_size_distribution = []

    print('... done.')

    # Save the location data.