Ejemplo n.º 1
0
    def load(bucket=None, uuid=None, key=None, shallow=False):
        """ Load a Datasource from the object store either by name or UUID

            uuid or name must be passed to the function

            Args:
                bucket (dict, default=None): Bucket to store object
                uuid (str, default=None): UID of Datasource
                name (str, default=None): Name of Datasource
                shallow (bool, default=False): Only load JSON data, do not
                read Datasets from object store. This will speed up creation
                of the Datasource object.
            Returns:
                Datasource: Datasource object created from JSON
        """
        from HUGS.ObjectStore import get_bucket, get_object_from_json

        if uuid is None and key is None:
            raise ValueError("Both uuid and key cannot be None")

        if bucket is None:
            bucket = get_bucket()

        if key is None:
            key = f"{Datasource._datasource_root}/uuid/{uuid}"

        data = get_object_from_json(bucket=bucket, key=key)

        return Datasource.from_data(bucket=bucket, data=data, shallow=shallow)
Ejemplo n.º 2
0
    def from_data(cls, data, bucket=None):
        """ Create an object from data

            Args:
                data (str): JSON data
                bucket (dict, default=None): Bucket for data storage
            Returns:
                cls: Class object of cls type
        """
        from Acquire.ObjectStore import string_to_datetime
        from HUGS.ObjectStore import get_bucket
        from collections import defaultdict

        if not data:
            raise ValueError("Unable to create object with empty dictionary")

        if bucket is None:
            bucket = get_bucket()

        c = cls()
        c._creation_datetime = string_to_datetime(data["creation_datetime"])
        c._datasource_uuids = data["datasource_uuids"]
        c._datasource_names = data["datasource_names"]
        c._file_hashes = data["file_hashes"]

        try:
            c._rank_data = defaultdict(dict, data["rank_data"])
        except KeyError:
            c._rank_data = defaultdict(dict)

        c._stored = False

        return c
Ejemplo n.º 3
0
    def save(self, bucket=None):
        """ Save this Datasource object as JSON to the object store

            Args:
                bucket (str, default=None): Bucket to hold data
            Returns:
                None
        """
        import tempfile
        from copy import deepcopy

        from Acquire.ObjectStore import get_datetime_now_to_string
        from HUGS.ObjectStore import get_bucket, set_object_from_file, set_object_from_json

        if bucket is None:
            bucket = get_bucket()

        if self._data:
            # Ensure we have the latest key
            if "latest" not in self._data_keys:
                self._data_keys["latest"] = {}

            # Backup the old data keys at "latest"
            version_str = f"v{str(len(self._data_keys))}"
            # Store the keys for the new data
            new_keys = {}

            # Iterate over the keys (daterange string) of the data dictionary
            for daterange in self._data:
                data_key = f"{Datasource._data_root}/uuid/{self._uuid}/{version_str}/{daterange}"

                new_keys[daterange] = data_key
                data = self._data[daterange]

                # TODO - for now just create a temporary directory - will have to update Acquire
                # or work on a PR for xarray to allow returning a NetCDF as bytes
                with tempfile.TemporaryDirectory() as tmpdir:
                    filepath = f"{tmpdir}/temp.nc"
                    data.to_netcdf(filepath)
                    set_object_from_file(bucket=bucket, key=data_key, filename=filepath)

            # Copy the last version
            if "latest" in self._data_keys:
                self._data_keys[version_str] = deepcopy(self._data_keys["latest"])

            # Save the new keys and create a timestamp
            self._data_keys[version_str]["keys"] = new_keys
            self._data_keys[version_str]["timestamp"] = get_datetime_now_to_string()

            # Link latest to the newest version
            self._data_keys["latest"] = self._data_keys[version_str]
            self._latest_version = version_str

        self._stored = True
        datasource_key = f"{Datasource._datasource_root}/uuid/{self._uuid}"

        set_object_from_json(bucket=bucket, key=datasource_key, data=self.to_data())
Ejemplo n.º 4
0
def listobjects(args):

    try:
        prefix = args["prefix"]
    except KeyError:
        prefix = None

    bucket = get_bucket()
    results = get_object_names(bucket=bucket, prefix=prefix)

    return {"results": results}
Ejemplo n.º 5
0
def remove_objects(args):
    from HUGS.ObjectStore import get_bucket, delete_object

    if "keys" in args:
        keys = args["keys"]
        if not keys:
            raise ValueError("No keys in list")
    else:
        raise KeyError("No keys found")

    bucket = get_bucket()

    for key in keys:
        delete_object(bucket=bucket, key=key)
Ejemplo n.º 6
0
    def exists(datasource_id, bucket=None):
        """ Check if a datasource with this ID is already stored in the object store

            Args:
                datasource_id (str): ID of datasource created from data
            Returns:
                bool: True if Datasource exists
        """
        from HUGS.ObjectStore import exists, get_bucket

        if bucket is None:
            bucket = get_bucket()

        key = f"{Datasource._datasource_root}/uuid/{datasource_id}"

        return exists(bucket=bucket, key=key)
Ejemplo n.º 7
0
    def exists(cls, bucket=None):
        """ Check if a GC object is already saved in the object
            store

            Args:
                bucket (dict, default=None): Bucket for data storage
            Returns:
                bool: True if object exists
        """
        from HUGS.ObjectStore import exists, get_bucket

        if bucket is None:
            bucket = get_bucket()

        key = f"{cls._root}/uuid/{cls._uuid}"

        return exists(bucket=bucket, key=key)
Ejemplo n.º 8
0
    def save(self, bucket=None):
        """ Save the object to the object store

            Args:
                bucket (dict, default=None): Bucket for data
            Returns:
                None
        """
        from HUGS.ObjectStore import get_bucket, set_object_from_json

        if bucket is None:
            bucket = get_bucket()

        key = f"{TEMPLATE._root}/uuid/{TEMPLATE._uuid}"

        self._stored = True
        set_object_from_json(bucket=bucket, key=key, data=self.to_data())
Ejemplo n.º 9
0
    def exists(bucket=None):
        """ Check if a Footprint object is already saved in the object
            store

            Args:
                bucket (dict, default=None): Bucket for data storage
            Returns:
                bool: True if object exists
        """
        from HUGS.ObjectStore import exists, get_bucket

        if bucket is None:
            bucket = get_bucket()

        key = "%s/uuid/%s" % (Footprint._footprint_root,
                              Footprint._footprint_uuid)

        return exists(bucket=bucket, key=key)
Ejemplo n.º 10
0
    def save(self, bucket=None):
        """ Save this Footprint object in the object store

            Args:
                bucket (dict): Bucket for data storage
            Returns:
                None
        """
        from HUGS.ObjectStore import get_bucket, set_object_from_json

        if self.is_null():
            return

        if bucket is None:
            bucket = get_bucket()

        self._stored = True
        key = f"{Footprint._footprint_root}/uuid/{Footprint._footprint_uuid}"
        set_object_from_json(bucket=bucket, key=key, data=self.to_data())
Ejemplo n.º 11
0
    def load(bucket=None):
        """ Load a Footprint object from the object store

            Args:
                bucket (dict, default=None): Bucket to store object
            Returns:
                Datasource: Datasource object created from JSON
        """
        from HUGS.ObjectStore import get_bucket, get_object_from_json

        if not Footprint.exists():
            return Footprint.create()

        if bucket is None:
            bucket = get_bucket()

        key = f"{Footprint._footprint_root}/uuid/{Footprint._footprint_uuid}"
        data = get_object_from_json(bucket=bucket, key=key)

        return Footprint.from_data(data=data, bucket=bucket)
Ejemplo n.º 12
0
    def load(cls, bucket=None):
        """ Load an object from the datastore using the passed
            bucket and UUID

            Args:
                inst (CRDS): CRDS instance
                bucket (dict, default=None): Bucket to store object
            Returns:
                Datasource: Datasource object created from JSON
        """
        from HUGS.ObjectStore import get_bucket, get_object_from_json

        if not cls.exists():
            return cls()

        if bucket is None:
            bucket = get_bucket()

        key = f"{cls._root}/uuid/{cls._uuid}"
        data = get_object_from_json(bucket=bucket, key=key)

        return cls.from_data(data=data, bucket=bucket)
Ejemplo n.º 13
0
def recombine_sections(data_keys):
    """ Combines separate dataframes into a single dataframe for
        processing to NetCDF for output

        Args:
            data_keys (list): Dictionary of object store keys keyed by search
            term
        Returns:
            Pandas.Dataframe or list: Combined dataframes
    """
    # from pandas import concat as _concat
    from xarray import concat as xr_concat
    from HUGS.ObjectStore import get_bucket
    from HUGS.Modules import Datasource

    bucket = get_bucket()

    data = [Datasource.load_dataset(bucket=bucket, key=k) for k in data_keys]

    combined = xr_concat(data, dim="time")

    combined = combined.sortby("time")

    # Check for duplicates?
    # This is taken from https://stackoverflow.com/questions/51058379/drop-duplicate-times-in-xarray
    # _, index = np.unique(f['time'], return_index=True)
    # f.isel(time=index)

    # Check that the dataframe's index is sorted by date
    # if not combined.time.is_monotonic_increasing:
    #     combined = combined.sortby("time")

    # if not combined.index.is_unique:
    #     raise ValueError("Dataframe index is not unique")

    return combined
Ejemplo n.º 14
0
    def delete(self, uuid):
        """ Delete a Datasource with the given UUID

            This function deletes both the record of the object store in he

            Args:
                uuid (str): UUID of Datasource
            Returns:
                None
        """
        from HUGS.ObjectStore import delete_object, get_bucket
        from HUGS.Modules import Datasource

        bucket = get_bucket()
        # Load the Datasource and get all its keys
        # iterate over these keys and delete them
        datasource = Datasource.load(uuid=uuid)

        data_keys = datasource.data_keys(return_all=True)

        for version in data_keys:
            key_data = data_keys[version]["keys"]

            for daterange in key_data:
                key = key_data[daterange]
                delete_object(bucket=bucket, key=key)

        # Then delete the Datasource itself
        key = f"{Datasource._datasource_root}/uuid/{uuid}"
        delete_object(bucket=bucket, key=key)

        # First remove from our dictionary of Datasources
        name = self._datasource_uuids[uuid]

        del self._datasource_names[name]
        del self._datasource_uuids[uuid]