Esempio n. 1
0
    def read_file(self, data_filepath, site=None, overwrite=False):
        """ Reads EUROCOM data files and returns the UUIDS of the Datasources
            the processed data has been assigned to

            Args:
                filepath (str or Path): Path of file to load
            Returns:
                list: UUIDs of Datasources data has been assigned to
        """
        from pathlib import Path
        from HUGS.Processing import assign_attributes

        data_filepath = Path(data_filepath)

        if site is None:
            site = data_filepath.stem.split("_")[0]

        # This should return xarray Datasets
        gas_data = self.read_data(data_filepath=data_filepath, site=site)

        # Assign attributes to the xarray Datasets here data here makes it a lot easier to test
        gas_data = assign_attributes(data=gas_data,
                                     site=site,
                                     sampling_period=self._sampling_period)

        return gas_data
Esempio n. 2
0
    def read_file(
        self,
        data_filepath,
        species=None,
        site=None,
        overwrite=False,
    ):
        """ Reads NOAA data files and returns the UUIDS of the Datasources
            the processed data has been assigned to

            Args:
                filepath (str or Path): Path of file to load
            Returns:
                list: UUIDs of Datasources data has been assigned to
        """
        from HUGS.Processing import assign_attributes
        from pathlib import Path

        data_filepath = Path(data_filepath)
        filename = data_filepath.name

        if species is None:
            species = filename.split("_")[0].lower()

        source_name = data_filepath.stem
        source_name = source_name.split("-")[0]

        gas_data = self.read_data(data_filepath=data_filepath, species=species)

        if site is None:
            site = gas_data[species.lower()]["metadata"]["site"]

        gas_data = assign_attributes(data=gas_data, site=site)

        return gas_data
Esempio n. 3
0
    def read_file(
        self,
        data_filepath,
        precision_filepath,
        site=None,
        instrument=None,
        network=None
    ):
        """ Reads a GC data file by creating a GC object and associated datasources

            TODO - should this default to GCMD when no instrument is passed?

            Args:
                data_filepath (str, pathlib.Path): Path of data file
                precision_filepath (str, pathlib.Path): Path of precision file
                site (str, default=None): Three letter code or name for site
                instrument (str, default=None): Instrument name
                network (str, default=None): Network name
            Returns:
                dict: Dictionary of source_name : UUIDs
        """
        from pathlib import Path
        from HUGS.Processing import assign_attributes
        from HUGS.Util import is_number
        import re

        data_filepath = Path(data_filepath)

        if site is None:
            # Read from the filename
            site_name = re.findall(r"[\w']+", data_filepath.stem)[0]
            site = self.get_site_code(site_name)

        # We need to have the 3 character site code here
        if len(site) != 3:
            site = self.get_site_code(site)

        # Try and find the instrument name in the filename
        if instrument is None:
            # Get the first part of the filename
            # Example filename: capegrim-medusa.18.C
            instrument = re.findall(r"[\w']+", str(data_filepath.name))[1]

            if is_number(instrument):
                # has picked out the year, rather than the instrument. Default to GCMD for this type of file
                instrument = "GCMD"
            
        gas_data = self.read_data(
            data_filepath=data_filepath,
            precision_filepath=precision_filepath,
            site=site,
            instrument=instrument,
            network=network
        )

        # Assign attributes to the data for CF compliant NetCDFs
        gas_data = assign_attributes(data=gas_data, site=site)

        return gas_data
Esempio n. 4
0
    def read_file(self, data_filepath):
        """ Reads THAMESBARRIER data files and returns the UUIDS of the Datasources
            the processed data has been assigned to

            Args:
                data_filepath (str or Path): Path of file to load
            Returns:
                list: UUIDs of Datasources data has been assigned to
        """
        from pathlib import Path
        from HUGS.Processing import assign_attributes

        data_filepath = Path(data_filepath)

        site = "TMB"

        gas_data = self.read_data(data_filepath=data_filepath)
        gas_data = assign_attributes(data=gas_data, site=site)

        return gas_data
Esempio n. 5
0
    def read_file(self,
                  data_filepath,
                  source_name=None,
                  site=None,
                  network=None):
        """ Creates a CRDS object holding data stored within Datasources

            Args:
                filepath (str): Path of file to load
                source_name (str, default=None): Name of source
                site (str, default=None): Name of site
                source_id (str, default=None): Source's unique ID
                overwrite (bool, default=False): If True overwrite any data currently stored for this date range
            Returns:
                None
        """
        from pathlib import Path
        from HUGS.Processing import assign_attributes

        if not isinstance(data_filepath, Path):
            data_filepath = Path(data_filepath)

        if not source_name:
            source_name = data_filepath.stem

        if not site:
            site = source_name.split(".")[0]

        # Process the data into separate Datasets
        gas_data = self.read_data(data_filepath=data_filepath,
                                  site=site,
                                  network=network)
        # Ensure the data is CF compliant
        gas_data = assign_attributes(data=gas_data,
                                     site=site,
                                     sampling_period=self._sampling_period)

        return gas_data
Esempio n. 6
0
def test_crds_attributes():
    _ = get_local_bucket(empty=True)

    crds = CRDS()

    filepath = get_datapath(filename="tac.picarro.1minute.100m.test.dat",
                            data_type="CRDS")

    combined = crds.read_data(data_filepath=filepath, site="tac")

    combined_attributes = assign_attributes(data=combined, site="tac")

    # for key in combined_attributes:
    #     ds = combined_attributes[key]["data"]
    #     ds.to_netcdf(f"/tmp/testfile_{key}.nc")

    ch4_data = combined_attributes["ch4"]["data"]
    co2_data = combined_attributes["co2"]["data"]

    ch4_attr = ch4_data.attrs
    co2_attr = co2_data.attrs

    ch4_attr_complete = ch4_attr.copy()
    co2_attr_complete = co2_attr.copy()

    del ch4_attr["File created"]
    del co2_attr["File created"]
    del ch4_attr["species"]
    del co2_attr["species"]
    del ch4_attr["Calibration_scale"]
    del co2_attr["Calibration_scale"]
    del ch4_attr["data_owner_email"]
    del co2_attr["data_owner_email"]
    del ch4_attr["data_owner"]
    del co2_attr["data_owner"]

    global_attributes = {
        "inlet_height_magl": "100m",
        "comment": "Cavity ring-down measurements. Output from GCWerks",
        "Conditions of use":
        "Ensure that you contact the data owner at the outset of your project.",
        "Source": "In situ measurements of air",
        "Conventions": "CF-1.6",
        "Processed by": "*****@*****.**",
        "station_longitude": 1.13872,
        "station_latitude": 52.51775,
        "station_long_name": "Tacolneston Tower, UK",
        "station_height_masl": 50.0,
    }

    assert ch4_attr == global_attributes
    assert co2_attr == global_attributes

    assert ch4_attr_complete["species"] == "ch4"
    assert co2_attr_complete["species"] == "co2"

    # Check the individual variables attributes

    time_attributes = {
        "label":
        "left",
        "standard_name":
        "time",
        "comment":
        "Time stamp corresponds to beginning of sampling period. Time since midnight UTC of reference date. Note that sampling periods are approximate.",
    }

    assert ch4_data.time.attrs == time_attributes
    assert co2_data.time.attrs == time_attributes

    # Check individual variables
    assert ch4_data["ch4"].attrs == {
        "long_name": "mole_fraction_of_methane_in_air",
        "units": "1e-9",
    }
    assert ch4_data["ch4_stdev"].attrs == {
        "long_name": "mole_fraction_of_methane_in_air_stdev",
        "units": "1e-9",
    }
    assert ch4_data["ch4_n_meas"].attrs == {
        "long_name": "mole_fraction_of_methane_in_air_n_meas"
    }

    assert co2_data["co2"].attrs == {
        "long_name": "mole_fraction_of_carbon_dioxide_in_air",
        "units": "1e-6",
    }
    assert co2_data["co2_stdev"].attrs == {
        "long_name": "mole_fraction_of_carbon_dioxide_in_air_stdev",
        "units": "1e-6",
    }
    assert co2_data["co2_n_meas"].attrs == {
        "long_name": "mole_fraction_of_carbon_dioxide_in_air_n_meas"
    }