def assign_weights_dates(self, adjust_dates=False): """ This takes all the weights that have occurred, and a file given by the user that contains information on when places change in terms of dates, and writes out the weights by the dates they occur. :param adjust_dates: if you data is recorded in years, but your dates of change in year-month-day, you can adjust all of your dates by assigning the additional characters here as a string :type adjust_dates: str :return: Nothing, just write out the file :rtype: None """ weights_list = {} for place_over_time in self._weights: # Extract the number of possible changes from the weight data and determine if any changes occur from the # dates data shapefile_years = self._set_shapefile_years(adjust_dates, place_over_time) changes = self._extract_relevant_changes(place_over_time.split("__")[0], shapefile_years) if len(changes) == 0: # If no changes occur, just access the first entry and set our dictionary to these values weights_list[place_over_time] = {min(shapefile_years): {place_over_time: 100.0}} else: # Otherwise assign dates of the changes to occur over time. weights_over_time = self._assigned_dates_to_weights( place_over_time, self._observed_dates(changes, shapefile_years), shapefile_years) weights_list[place_over_time] = {date: {place: weight for place, weight in place_weights} for date, place_weights in weights_over_time} write_json(weights_list, self._working_dir, self._write_name)
def remove_weight(self, place, weight_date): """ Remove a weight from a place Sometimes you may have a weight assigned that you do not want. For example, if a there is a minor error in the drawing of a shapefile between periods you will end up with a change which may not actually occur. You can remove a 'weight' from a 'place' by providing the place key to place, and the weight you want to remove to 'weight'. :param place: The place to load from the master database :type place: str :param weight_date: The weight's date to remove from the master :type weight_date: str :return: Nothing, remove from the master then stop :rtype: None """ # Load the current place form the weights current = self._weights[place] # Create the replacement, where the each date is assign its previous weight places as long as the date does not # equal the weight_date provided replacement = {date: weight_places for date, weight_places in current.items() if date != weight_date} # Replace original place weights with replacement self._weights[place] = replacement write_json(self._weights, self._weights_path.parent, self._weights_path.stem)
def replace_assigned_weight(self, fixed_json_path, name): """ Find a place within the master dict called 'name' and add dates from the json file of fixed_json_path In some situations you may find a complex problem or a mistake and want to replace a given place rather than have to - rerun the whole constructor. This allows you to replace a given place by its key in the base_weights file you made on all your data, and a new smaller update file. The changes between the update and the master will be logged and then the master file will be updated. :param fixed_json_path: The path to the json file to load the fixed dates from :type fixed_json_path: Path | str :param name: The place key in the master _weights to load and replace dates from :type name: str """ # Load the fix file fixed = load_json(fixed_json_path) # Create the restructured values for the named place key_list = self._replacement_keys(name, fixed) restructured = {str(year): self._replacement_values(fixed, name, year, new) for year, new in key_list} # Updating the existing json with the new information write_data = self._weights write_data[name] = restructured write_json(write_data, self._weights_path.parent, self._weights_path.stem)
def weight_external(self, write_path, write_name="Weighted"): """ This will use all the places and weights from the weights by dates file, and use it to weight an external data source. """ for place_name in self._weights_dates: # See how many changes exist for this place dates_of_change = [ date for date in self._weights_dates[place_name].keys() ] # If there is only one date, we have no weighting to do as the place remains unchanged from its first state if (len(dates_of_change) == 1) and self.extract_data(place_name): self._master[place_name] = self.extract_data(place_name) # Otherwise we need to weight the data, and potentially consider non-common dates across places else: self._master[place_name] = self._weight_place( place_name, dates_of_change) # Write out the weighted data print("Finished constructing weights - writing to file") write_json(self._master, write_path, write_name) if len(self._non_common.keys()) > 0: write_non_common = { key: value for key, value in self._non_common.items() if len(value) > 0 } write_json(write_non_common, write_path, "NonCommonDates")
def relational_subprocess(self, index_list, index_of_process, data_directory, write_directory): """ This sub process is run via a call from relational_database via Process Each process is set a sub selection of indexes from the PlaceReference loaded into _matcher. Each process will then isolate this name and create a output json database for it by extracting any matching entries attributes from the data directory. :param index_list: A list of indexes to load from the PlaceReference for this process :type index_list: list[int] :param index_of_process: Which process thread this is :type index_of_process: int :param data_directory: Load directory the of standardised, cleaned, and correct data :type data_directory: str | Path :param write_directory: Write Directory for the json database :type write_directory: str | Path :return: Nothing, write a json database for each location that has been indexed from the PlaceReference. :rtype: None """ # Currently processed files in the output directory current_files = [f for f in directory_iterator(write_directory)] for call_index, place_index in enumerate(index_list, 1): print( f"{call_index} / {len(index_list)} for process {index_of_process}" ) # Create the unique name from the groups and isolate the gid for parsing the csv unique_name = "__".join( self._set_standardised_place(self._matcher[place_index])) gid = self._matcher[place_index][0] # Set the output stub for this place's json database place_data = {"Place_Name": unique_name, "GID": gid} # If the data has not already been processed if self._not_processed(unique_name, current_files): for file in directory_iterator(data_directory): # Load the data into memory data = CsvObject(Path(data_directory, file), set_columns=True) # Isolate any data pertaining to this place from this file and add them to the place_data dict self._process_relation_data(data, gid, place_data) write_json(place_data, write_directory, f"{unique_name}_{self._data_name}")
def remove_place(self, places_to_remove): """ Remove a place from the master dict You may have a place you do not want in the master dict, but do not want to edit the shapefile to ensure it is not added. You can remove as many places as you want by providing a list of places to remove to this method. :param places_to_remove: The places you wish to remove from the master dict, represents the master dicts keys. :type: list :return: Nothing, will remove from master then stop :rtype: None """ self._weights = {key: value for key, value in self._weights.items() if key not in places_to_remove} write_json(self._weights, self._weights_path.parent, self._weights_path.stem)
def combine_dataset(self, path_list, write_directory, database_name): """ This will combine all the dataset's you have made into a single json database This will combine all the regional data from all standardised dataset's into a single json database. If you only had 1 database to begin with, then this just adds all the separate json databases into a single 1. Where it is mostly used, is when you have run this process on multiple dataset's and now want all the standardised places to share attribute data in a single database. :param path_list: A list of paths, where each path goes to a set directory :type path_list: list[str | Path] :param write_directory: The write directory of the master database :type write_directory: str | Path :param database_name: The master database name :type database_name: str :return: Nothing, write the database to file then stop :rtype: None """ # Initialise the output database master_database = {} # Isolate all the paths to all the files we want to load across all the database for this geo-level level_data = [ Path(path, file) for path in path_list for file in directory_iterator(path) ] for index, file in enumerate(level_data): if index % 100 == 0: print(f"{index}/{len(level_data)}") # Load the data for this file into memory, set the master database assign name via Place_Name load_data = load_json(file) assign_name = load_data["Place_Name"] # If the current attribute does not exist within the current database, add it to it current_attributes = self._current_attributes( master_database, assign_name) for attr in load_data.keys(): if attr not in current_attributes: master_database[assign_name][attr] = load_data[attr] write_json(master_database, write_directory, database_name)
def add_place(self, new_weight): """ Add a place to master dict In some situations you may wish to add a place that was not set, or you have removed a weight and now want to add it's replacement. Here you just assign dict you want to assign to the master json dict, each key will be added to the master dict. :param new_weight: A dict of place: weights, where place is the name of the place to be weighted and the weights the weights assigned at given dates assigned to the place :return: Nothing, will add to master dict then stop :rtype: None """ for key in new_weight.keys(): self._weights[key] = new_weight[key] write_json(self._weights, self._weights_path.parent, self._weights_path.stem)