예제 #1
0
    def assign_weights_dates(self, adjust_dates=False):
        """
        This takes all the weights that have occurred, and a file given by the user that contains information on when
        places change in terms of dates, and writes out the weights by the dates they occur.

        :param adjust_dates: if you data is recorded in years, but your dates of change in year-month-day, you can
            adjust all of your dates by assigning the additional characters here as a string
        :type adjust_dates: str

        :return: Nothing, just write out the file
        :rtype: None
        """

        weights_list = {}
        for place_over_time in self._weights:
            # Extract the number of possible changes from the weight data and determine if any changes occur from the
            # dates data
            shapefile_years = self._set_shapefile_years(adjust_dates, place_over_time)
            changes = self._extract_relevant_changes(place_over_time.split("__")[0], shapefile_years)

            if len(changes) == 0:
                # If no changes occur, just access the first entry and set our dictionary to these values
                weights_list[place_over_time] = {min(shapefile_years): {place_over_time: 100.0}}
            else:
                # Otherwise assign dates of the changes to occur over time.
                weights_over_time = self._assigned_dates_to_weights(
                    place_over_time, self._observed_dates(changes, shapefile_years), shapefile_years)

                weights_list[place_over_time] = {date: {place: weight for place, weight in place_weights}
                                                 for date, place_weights in weights_over_time}

        write_json(weights_list, self._working_dir, self._write_name)
예제 #2
0
    def remove_weight(self, place, weight_date):
        """
        Remove a weight from a place

        Sometimes you may have a weight assigned that you do not want. For example, if a there is a minor error in the
        drawing of a shapefile between periods you will end up with a change which may not actually occur. You can
        remove a 'weight' from a 'place' by providing the place key to place, and the weight you want to remove to
        'weight'.

        :param place: The place to load from the master database
        :type place: str

        :param weight_date: The weight's date to remove from the master
        :type weight_date: str

        :return: Nothing, remove from the master then stop
        :rtype: None
        """
        # Load the current place form the weights
        current = self._weights[place]

        # Create the replacement, where the each date is assign its previous weight places as long as the date does not
        # equal the weight_date provided
        replacement = {date: weight_places for date, weight_places in current.items() if date != weight_date}

        # Replace original place weights with replacement
        self._weights[place] = replacement
        write_json(self._weights, self._weights_path.parent, self._weights_path.stem)
예제 #3
0
    def replace_assigned_weight(self, fixed_json_path, name):
        """
        Find a place within the master dict called 'name' and add dates from the json file of fixed_json_path

        In some situations you may find a complex problem or a mistake and want to replace a given place rather than
        have to - rerun the whole constructor. This allows you to replace a given place by its key in the base_weights
        file you made on all your data, and a new smaller update file. The changes between the update and the master
        will be logged and then the master file will be updated.

        :param fixed_json_path: The path to the json file to load the fixed dates from
        :type fixed_json_path: Path | str

        :param name: The place key in the master _weights to load and replace dates from
        :type name: str
        """
        # Load the fix file
        fixed = load_json(fixed_json_path)

        # Create the restructured values for the named place
        key_list = self._replacement_keys(name, fixed)
        restructured = {str(year): self._replacement_values(fixed, name, year, new) for year, new in key_list}

        # Updating the existing json with the new information
        write_data = self._weights
        write_data[name] = restructured
        write_json(write_data, self._weights_path.parent, self._weights_path.stem)
예제 #4
0
    def weight_external(self, write_path, write_name="Weighted"):
        """
        This will use all the places and weights from the weights by dates file, and use it to weight an external data
        source.
        """
        for place_name in self._weights_dates:

            # See how many changes exist for this place
            dates_of_change = [
                date for date in self._weights_dates[place_name].keys()
            ]

            # If there is only one date, we have no weighting to do as the place remains unchanged from its first state
            if (len(dates_of_change) == 1) and self.extract_data(place_name):
                self._master[place_name] = self.extract_data(place_name)

            # Otherwise we need to weight the data, and potentially consider non-common dates across places
            else:
                self._master[place_name] = self._weight_place(
                    place_name, dates_of_change)

        # Write out the weighted data
        print("Finished constructing weights - writing to file")
        write_json(self._master, write_path, write_name)
        if len(self._non_common.keys()) > 0:
            write_non_common = {
                key: value
                for key, value in self._non_common.items() if len(value) > 0
            }
            write_json(write_non_common, write_path, "NonCommonDates")
예제 #5
0
    def relational_subprocess(self, index_list, index_of_process,
                              data_directory, write_directory):
        """
        This sub process is run via a call from relational_database via Process

        Each process is set a sub selection of indexes from the PlaceReference loaded into _matcher. Each process will
        then isolate this name and create a output json database for it by extracting any matching entries attributes
        from the data directory.

        :param index_list: A list of indexes to load from the PlaceReference for this process
        :type index_list: list[int]

        :param index_of_process: Which process thread this is
        :type index_of_process: int

        :param data_directory: Load directory the of standardised, cleaned, and correct data
        :type data_directory: str | Path

        :param write_directory: Write Directory for the json database
        :type write_directory: str | Path

        :return: Nothing, write a json database for each location that has been indexed from the PlaceReference.
        :rtype: None
        """

        # Currently processed files in the output directory
        current_files = [f for f in directory_iterator(write_directory)]

        for call_index, place_index in enumerate(index_list, 1):
            print(
                f"{call_index} / {len(index_list)} for process {index_of_process}"
            )

            # Create the unique name from the groups and isolate the gid for parsing the csv
            unique_name = "__".join(
                self._set_standardised_place(self._matcher[place_index]))
            gid = self._matcher[place_index][0]

            # Set the output stub for this place's json database
            place_data = {"Place_Name": unique_name, "GID": gid}

            # If the data has not already been processed
            if self._not_processed(unique_name, current_files):
                for file in directory_iterator(data_directory):

                    # Load the data into memory
                    data = CsvObject(Path(data_directory, file),
                                     set_columns=True)

                    # Isolate any data pertaining to this place from this file and add them to the place_data dict
                    self._process_relation_data(data, gid, place_data)

                write_json(place_data, write_directory,
                           f"{unique_name}_{self._data_name}")
예제 #6
0
    def remove_place(self, places_to_remove):
        """
        Remove a place from the master dict

        You may have a place you do not want in the master dict, but do not want to edit the shapefile to ensure it is
        not added. You can remove as many places as you want by providing a list of places to remove to this method.

        :param places_to_remove: The places you wish to remove from the master dict, represents the master dicts keys.
        :type: list

        :return: Nothing, will remove from master then stop
        :rtype: None
        """

        self._weights = {key: value for key, value in self._weights.items() if key not in places_to_remove}
        write_json(self._weights, self._weights_path.parent, self._weights_path.stem)
예제 #7
0
    def combine_dataset(self, path_list, write_directory, database_name):
        """
        This will combine all the dataset's you have made into a single json database

        This will combine all the regional data from all standardised dataset's into a single json database. If you only
        had 1 database to begin with, then this just adds all the separate json databases into a single 1. Where it is
        mostly used, is when you have run this process on multiple dataset's and now want all the standardised places to
        share attribute data in a single database.

        :param path_list: A list of paths, where each path goes to a set directory
        :type path_list: list[str | Path]

        :param write_directory: The write directory of the master database
        :type write_directory: str | Path

        :param database_name: The master database name
        :type database_name: str

        :return: Nothing, write the database to file then stop
        :rtype: None
        """

        # Initialise the output database
        master_database = {}

        # Isolate all the paths to all the files we want to load across all the database for this geo-level
        level_data = [
            Path(path, file) for path in path_list
            for file in directory_iterator(path)
        ]

        for index, file in enumerate(level_data):
            if index % 100 == 0:
                print(f"{index}/{len(level_data)}")

            # Load the data for this file into memory, set the master database assign name via Place_Name
            load_data = load_json(file)
            assign_name = load_data["Place_Name"]

            # If the current attribute does not exist within the current database, add it to it
            current_attributes = self._current_attributes(
                master_database, assign_name)
            for attr in load_data.keys():
                if attr not in current_attributes:
                    master_database[assign_name][attr] = load_data[attr]

        write_json(master_database, write_directory, database_name)
예제 #8
0
    def add_place(self, new_weight):
        """
        Add a place to master dict

        In some situations you may wish to add a place that was not set, or you have removed a weight and now want
        to add it's replacement. Here you just assign dict you want to assign to the master json dict, each key will
        be added to the master dict.

        :param new_weight: A dict of place: weights, where place is the name of the place to be weighted and the weights
            the weights assigned at given dates assigned to the place
        :return: Nothing, will add to master dict then stop
        :rtype: None
        """

        for key in new_weight.keys():
            self._weights[key] = new_weight[key]
        write_json(self._weights, self._weights_path.parent, self._weights_path.stem)