Exemple #1
0
    def get_rank(self: T, uuid: str, start_date: Timestamp, end_date: Timestamp) -> Dict:
        """Get the rank for the given Datasource for a given date range

        Args:
            uuid: UUID of Datasource
            start_date: Start date
            end_date: End date
        Returns:
            dict: Dictionary of rank and daterange covered by that rank
        """
        from openghg.util import daterange_overlap, create_daterange_str
        from collections import defaultdict

        if uuid not in self._rank_data:
            return {}

        search_daterange = create_daterange_str(start=start_date, end=end_date)

        rank_data = self._rank_data[uuid]

        ranked = defaultdict(list)
        # Check if this Datasource is ranked for the dates passed
        for daterange, rank in rank_data.items():
            if daterange_overlap(daterange_a=search_daterange, daterange_b=daterange):
                ranked[rank].append(daterange)

        return ranked
Exemple #2
0
def test_daterange_overlap():
    start_date_a = "2001-01-01"
    end_date_a = "2001-06-30"

    start_date_b = "2001-02-01"
    end_date_b = "2001-09-01"

    daterange_a = create_daterange_str(start=start_date_a, end=end_date_a)
    daterange_b = create_daterange_str(start=start_date_b, end=end_date_b)

    assert daterange_overlap(daterange_a=daterange_a,
                             daterange_b=daterange_b) is True

    start_date_b = "2001-07-01"
    end_date_b = "2001-11-01"

    daterange_b = create_daterange_str(start=start_date_b, end=end_date_b)

    assert daterange_overlap(daterange_a=daterange_a,
                             daterange_b=daterange_b) is False
Exemple #3
0
    def add_field_data(self, data: Dataset, data_type: str) -> None:
        """Add footprints data to this Datasource

        TODO - unsure if add_field_data is the best name for this function
        Could add a more general function that allows toggle of chunking

        Args:
            data: Footprint data in an xarray.Dataset
            metadata: Metadata
            data_type: Type of data (footprints, flux, met)
        Returns:
            None
        """
        from openghg.util import daterange_overlap

        # Use a dictionary keyed with the daterange covered by each segment of data
        new_data = {}
        # This daterange string covers the whole of the Dataset
        # For the moment we're not going to chunk footprints
        daterange_str = self.get_dataset_daterange_str(dataset=data)
        new_data[daterange_str] = data

        if self._data:
            # We don't want the same data twice, this will be stored in previous versions
            # Check for overlap between exisiting and new dateranges
            to_keep = []
            for current_daterange in self._data:
                for new_daterange in new_data:
                    if not daterange_overlap(daterange_a=current_daterange,
                                             daterange_b=new_daterange):
                        to_keep.append(current_daterange)

            updated_data = {}
            for k in to_keep:
                updated_data[k] = self._data[k]
            # Add in the additional new data
            updated_data.update(new_data)

            self._data = updated_data
        else:
            self._data = new_data

        self._data_type = data_type
        self.add_metadata_key(key="data_type", value=data_type)
        self.update_daterange()
Exemple #4
0
    def add_timeseries_data(self, data: Dataset) -> None:
        """Add timeseries data to this Datasource

        Args:
            data: An xarray.Dataset
        Returns:
            None
        """
        from openghg.util import daterange_overlap

        # Group by year
        year_group = data.groupby("time.year")
        year_data = [data for _, data in year_group if data]

        # Use a dictionary keyed with the daterange covered by each segment of data
        additional_data = {}

        for year in year_data:
            daterange_str = self.get_dataset_daterange_str(dataset=year)
            additional_data[daterange_str] = year

        if self._data:
            # We don't want the same data twice, this will be stored in previous versions
            # Check for overlap between exisiting and new dateranges
            to_keep = []
            for current_daterange in self._data:
                for new_daterange in additional_data:
                    if not daterange_overlap(daterange_a=current_daterange,
                                             daterange_b=new_daterange):
                        to_keep.append(current_daterange)

            updated_data = {}
            for k in to_keep:
                updated_data[k] = self._data[k]
            # Add in the additional new data
            updated_data.update(additional_data)

            self._data = updated_data
        else:
            self._data = additional_data

        data_type = "timeseries"
        self._data_type = data_type
        self.add_metadata_key(key="data_type", value=data_type)
        self.update_daterange()
Exemple #5
0
    def set_rank(
        self: T,
        uuid: str,
        rank: Union[int, str],
        date_range: Union[str, List[str]],
        overwrite: Optional[bool] = False,
    ) -> None:
        """Set the rank of a Datasource associated with this object.

        This function performs checks to ensure multiple ranks aren't set for
        overlapping dateranges.

        Passing a daterange and rank to this function will overwrite any current
        daterange stored for that rank.

        Args:
            uuid: UUID of Datasource
            rank: Rank of data
            date_range: Daterange(s)
            overwrite: Overwrite current ranking data
        Returns:
            None
        """
        from copy import deepcopy
        from openghg.util import (
            combine_dateranges,
            daterange_overlap,
            trim_daterange,
            daterange_contains,
            split_encompassed_daterange,
            sanitise_daterange,
        )

        rank = int(rank)

        if not 1 <= rank <= 10:
            raise TypeError("Rank can only take values 1 to 10 (for unranked). Where 1 is the highest rank.")

        if not isinstance(date_range, list):
            date_range = [date_range]

        # Make sure the dateranges passed are correct and are tz-aware
        date_range = [sanitise_daterange(d) for d in date_range]
        # Combine in case we have overlappng dateranges
        date_range = combine_dateranges(date_range)

        # Used to store dateranges that need to be trimmed to ensure no daterange overlap
        to_update = []
        # Non-overlapping dateranges that can be stored directly
        to_add = []

        if uuid in self._rank_data:
            rank_data = self._rank_data[uuid]
            # Check this source isn't ranked differently for the same dates
            for new_daterange in date_range:
                overlap = False
                # Check for overlapping dateranges and add
                for existing_daterange, existing_rank in rank_data.items():
                    if daterange_overlap(daterange_a=new_daterange, daterange_b=existing_daterange):
                        overlap = True

                        if rank != existing_rank and overwrite:
                            # Save the daterange we need to update
                            to_update.append((existing_daterange, new_daterange))
                            continue
                        # If the ranks are the same we just want to combine the dateranges
                        elif rank == existing_rank:
                            to_combine = [new_daterange, existing_daterange]
                            combined = combine_dateranges(dateranges=to_combine)[0]
                            to_update.append((existing_daterange, combined))
                        else:
                            raise ValueError(
                                f"This datasource has rank {existing_rank} for dates that overlap the ones given. \
                                                Overlapping dateranges are {new_daterange} and {existing_daterange}"
                            )
                # Otherwise we just want to add the new daterange to the dict
                if not overlap:
                    to_add.append(new_daterange)

            # If we've got dateranges to update and ranks to overwrite we need to trim the
            # previous ranking daterange down so we don't have overlapping dateranges
            if to_update:
                # Here we first take a backup of the old ranking data, update
                # it and then write it back
                ranking_backup = deepcopy(rank_data)

                for existing, new in to_update:
                    # Remove the existing daterange key
                    # Here we pass if it doesn't exist as if we have multiple new dateranges
                    # that overlap the existing daterange it might have been deleted during
                    # a previous iteration
                    try:
                        del ranking_backup[existing]
                    except KeyError:
                        pass

                    # If we want to overwrite an existing rank we need to trim that daterange and
                    # rewrite it back to the dictionary
                    rank_copy = rank_data[existing]

                    if overwrite:
                        if existing == new:
                            ranking_backup[new] = rank_copy
                        # If the existing daterange contains the new daterange
                        # we need to split it into parts and save those
                        elif daterange_contains(container=existing, contained=new):
                            result = split_encompassed_daterange(container=existing, contained=new)

                            existing_start = result["container_start"]
                            ranking_backup[existing_start] = rank_copy

                            updated_new = result["contained"]
                            ranking_backup[updated_new] = rank

                            # We might only end up with two dateranges
                            try:
                                existing_end = result["container_end"]
                                ranking_backup[existing_end] = rank_copy
                            except KeyError:
                                pass
                        # If the new daterange contains the existing we can just overwrite it
                        elif daterange_contains(container=new, contained=existing):
                            ranking_backup[new] = rank
                        else:
                            trimmed = trim_daterange(to_trim=existing, overlapping=new)
                            ranking_backup[trimmed] = rank_copy
                            ranking_backup[new] = rank
                    elif rank_copy == rank:
                        # If we're not overwriting we just need to update to use the new combined
                        ranking_backup[new] = rank_copy

                self._rank_data[uuid] = ranking_backup

            # Finally, store the dateranges that didn't overlap
            for d in to_add:
                self._rank_data[uuid][d] = rank
        else:
            for d in date_range:
                self._rank_data[uuid][d] = rank

        self.save()