def test_constraint_check(self): location = sp.load_location_from_filepath( "test_location_grand_child.json", check_constraints=True) self.assertEqual(location['school_size_distribution'], [0.5, 0.5]) with self.assertWarns(Warning) as wn: sp.load_location_from_filepath("test_location_bad.json", check_constraints=True) self.assertTrue( 'has some negative values' in str(wn), 'Check failed: expect to get negative distribution check messages' )
def test_check_probability_distribution_nonnegative(self, location_name='usa-Washington-seattle_metro', property_list=None): """ Run all checks for fields in property_list representing probability distributions. Each should have all non negative values. Args: location_name(str) : name of the location json to test property_list (list) : list of properties to check the sum of the probabilityd distribution """ location_file_path = f"{location_name}.json" location = sp.load_location_from_filepath(location_file_path) if property_list is None: sp.logger.info(f"\nTesting all probability distributions are all non negative for {location_name}.") checks, msgs = sp.check_all_probability_distribution_nonnegative(location) err_msgs = [msg for msg in msgs if msg is not None] err_msg = "\n".join(err_msgs) assert sum(checks) == len(checks), err_msg # assert that all checks passed print(f'All {sum(checks)} checks passed.') else: # Examples of how the non negative checks can be run for a subset of properties sp.logger.info(f"\nTesting a subset of probability distributions are all non negative for {location_name}") for i, property_name in enumerate(property_list): check, msg = sp.check_probability_distribution_nonnegative(location, property_name) assert check == True, msg print(f'{property_name} check passed.')
def test_check_probability_distribution_sums(self, location_name='usa-Washington-seattle_metro', property_list=None, tolerance=1e-2): """ Run all checks for fields in property_list representing probability distributions. Each should have a sum that equals 1 within the tolerance level. Args: location_name(str) : name of the location json to test property_list (list) : list of properties to check the sum of the probabilityd distribution tolerance (float) : difference from the sum of 1 tolerated """ location_file_path = f"{location_name}.json" location = sp.load_location_from_filepath(location_file_path) if property_list is None: sp.logger.info(f"\nTesting all probability distributions sum to 1 or within tolerance {tolerance} for {location_name}.") checks, msgs = sp.check_all_probability_distribution_sums(location, tolerance) err_msgs = [msg for msg in msgs if msg is not None] # only get the msgs for failures err_msg = "\n".join(err_msgs) assert sum(checks) == len(checks), err_msg # assert that all checks passed print(f'All {sum(checks)} checks passed.') else: # Example of how the sum checks can be run for a subset of properties sp.logger.info(f"\nTesting a subset of probability distributions sum to 1 or within tolerance {tolerance} for {location_name}.") for i, property_name in enumerate(property_list): check, msg = sp.check_probability_distribution_sum(location, property_name, tolerance) assert check == True, msg print(f'{property_name} check passed.')
def setup_convert_df_to_json_array(self, pars): """ Set up objects to compare. Args: pars (dict): dictionary to get the data array and json array for comparison. Returns: array, json.array : An array of the desired data from a dataframe and the json entry for comparison. """ df = pd.read_csv(pars.filepath) # columns to include : include all by default if pars.cols_ind == []: cols = df.columns else: cols = df.columns[ pars. cols_ind] # use indices to indicate which columns to include if pars.int_cols_ind == []: int_cols = pars.int_cols_ind else: int_cols = list(df.columns[pars.int_cols_ind].values) # array-ify all the data, convert some columns to integers arr = sp.convert_df_to_json_array(df, cols, int_cols) # corresponding json data object for the same location and data location = sp.load_location_from_filepath(f"{pars.location_name}.json") json_array = getattr(location, pars.property_name) if pars.property_name == 'population_age_distributions': json_array = [j for j in json_array if j.num_bins == len(arr)][0].distribution return arr, json_array
# get the head of household age brackets and populate the household head age brackets field get_household_head_age_brackets(location_data) # get the head of household age distribution by household or family size and populate the household head age distribution by family size field get_household_head_age_distribution_by_family_size(location_data) # get the workplace size distribution array and populate the workplace size distribution field get_workplace_size_counts_by_num_personnel(location_data) # get age ranges by school type and populate the school types by age --- defines an age range for each school type get_school_types_by_age(location_data) # get reference links and populate the reference links # you might do this step repeatedly as you find more data and populate your json get_reference_links(location_data) # TODO: adding notes to your json --- sometimes you may want to add additional information on how data were inferred # you can also implement a method get_note with decorator '@location_data_construct' to return list of notes note = "" # add note location_data.notes.append(note) # save the loaded json file sp.save_location_to_filepath(location_data, json_filepath) print('saved.') # check that you can reload the newly created json new_location_data = sp.load_location_from_filepath(json_filepath) print(f'{new_location_data.location_name} loaded.')
def test_load_minimal_location_with_parent_filepath_from_filepath(self): child_filepath = os.path.join("unittests", "test_location_child.json") location = sp.load_location_from_filepath(child_filepath) self.check_minimal_location_with_parent(location)
cols=df.columns, int_cols=['workplace_size_min', 'workplace_size_max']) location_data.workplace_size_counts_by_num_personnel = workplace_size_dist_arr return location_data if __name__ == '__main__': location_name = "Nepal" # path to the new json we want to create for Zimbabwe json_filepath = os.path.join(sp.settings.datadir, f'{location_name}.json') # check if the file already exists and if not, create one try: location_data = sp.load_location_from_filepath(json_filepath) except: location_data = sp.Location() # add the country as the location_name location_data.location_name = location_name # add age distribution data from raw data files location_data = process_age_dists(location_data) # add employment rates by age data from raw data files location_data = process_employment_rates(location_data) # add enrollment rates by age data from raw data files location_data = process_enrollment_rates(location_data)
def test_load_minimal_location_with_parent_filepath_from_filepath(self): child_filepath = "test_location_child.json" location = sp.load_location_from_filepath(child_filepath, check_constraints=False) self.check_minimal_location_with_parent(location)
# We'll load location data from here. input_location_filepath = "usa.json" # After we modify some of the location data, we'll save it here. output_location_filepath = "example_location.json" # print(f'Loading location from [{input_location_filepath}], relative to synthpops config datadir: [{synthpops.config.datadir}]') print( f'Loading location from [{input_location_filepath}], relative to synthpops config datadir: [{sp.datadir}]' ) # Load the location data file. When we invoke load_location_from_filepath() below, the argument will be # interpreted relative to the directory provided to synthpops.config.set_datadir(). In this case, # we are setting that to be the python working directory. So, the argument to load_location_from_filepath() # will be interpreted relative to the python working directory. location_data: sp.Location = sp.load_location_from_filepath( input_location_filepath) print('Modifying the location data...') # Add a note to the notes field. location_data.notes.append("Here's a new note added by the example code.") # Overwrite a field. Here we have a community where kids are expected to do a lot of chores. location_data.employment_rates_by_age = [[1, 0.00], [2, 0.00], [3, 0.00], [4, 0.00], [5, 0.15], [6, 0.50]] # Clear a field, irrespective of whatever it was set to before. location_data.household_size_distribution = [] print('... done.') # Save the location data.