Example #1
0
    def setUp(self) -> None:
        self.station_name = "Barrow-ARM"
        self.network_name = "COSMOS"
        self.instrument = "Cosmic-ray-Probe"
        self.variable = "soil_moisture"
        self.depth_from = 0.0
        self.depth_to = 0.21

        self.longitude = -156.62870
        self.latitude = 71.32980
        self.elevation = 4.0

        root = testdata_path / "Data_seperate_files_20170810_20180809"
        filepath = Path(
            self.network_name,
            self.station_name,
            f"{self.network_name}_{self.network_name}_{self.station_name}_sm_{self.depth_from:.6f}_{self.depth_to:.6f}_{self.instrument}_20170810_20180809.stm",
        )

        self.file = DataFile(root, filepath)

        self.data_should_201708113 = {
            "datetime": "2017-08-11 13:00:00",
            self.variable: 0.183,
            f"{self.variable}_flag": "G",
            f"{self.variable}_orig_flag": "M",
        }
Example #2
0
    def setUp(self) -> None:
        filename_format_header_values_root = os.path.join(
            os.path.dirname(__file__), "test_data", "format_header_values")
        filename_format_header_values_filepath = os.path.join(
            "SMOSMANIA",
            "SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm",
        )

        self.filehandler_header_values = DataFile(
            filename_format_header_values_root,
            filename_format_header_values_filepath,
        )

        filename_format_ceop_sep_root = os.path.join(os.path.dirname(__file__),
                                                     "test_data",
                                                     "format_ceop_sep")
        filename_format_ceop_sep_filepath = os.path.join(
            "SMOSMANIA",
            "SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm",
        )

        self.filehandler_ceop_sep = DataFile(
            filename_format_ceop_sep_root, filename_format_ceop_sep_filepath)

        filename_malformed_root = os.path.join(os.path.dirname(__file__),
                                               "test_data", "malformed")
        filename_malformed_filepath = os.path.join("mal_formed_file.txt")

        with pytest.raises(IOError):
            DataFile(filename_malformed_root, filename_malformed_filepath)

        self.metadata_ref = {
            "network": "SMOSMANIA",
            "station": "Narbonne",
            "latitude": 43.15,
            "longitude": 2.9567,
            "elevation": 112.0,
            "variable": "soil_moisture",
            "timerange_from": Timestamp(2007, 1, 1, 1),
            "timerange_to": Timestamp(2007, 1, 31, 23),
            "instrument": "ThetaProbe-ML2X",
        }

        self.metadata_depth_from = 0.05
        self.metadata_depth_to = 0.05

        self.metadata_ref_ceop = dict(self.metadata_ref)
        self.metadata_ref_ceop["depth_from"] = ["multiple"]
        self.metadata_ref_ceop["depth_to"] = ["multiple"]
        self.metadata_ref_ceop["variable"] = ["ts", "sm"]
        self.metadata_ref_ceop["sensor"] = "n.s"
Example #3
0
    def setUp(self) -> None:
        filename_format_header_values_root = os.path.join(
            os.path.dirname(__file__), 'test_data', 'format_header_values')
        filename_format_header_values_filepath = os.path.join(
            'SMOSMANIA',
            'SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm'
        )

        self.filehandler_header_values = \
            DataFile(filename_format_header_values_root,
                     filename_format_header_values_filepath)

        filename_format_ceop_sep_root = os.path.join(os.path.dirname(__file__),
                                                     'test_data',
                                                     'format_ceop_sep')
        filename_format_ceop_sep_filepath = os.path.join(
            'SMOSMANIA',
            'SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm'
        )

        self.filehandler_ceop_sep = DataFile(
            filename_format_ceop_sep_root, filename_format_ceop_sep_filepath)

        filename_malformed_root = os.path.join(os.path.dirname(__file__),
                                               'test_data', 'malformed')
        filename_malformed_filepath = os.path.join('mal_formed_file.txt')

        with pytest.raises(IOError):
            DataFile(filename_malformed_root, filename_malformed_filepath)

        self.metadata_ref = {
            'network': 'SMOSMANIA',
            'station': 'Narbonne',
            'latitude': 43.15,
            'longitude': 2.9567,
            'elevation': 112.0,
            'variable': 'soil_moisture',
            'timerange_from': Timestamp(2007, 1, 1, 1),
            'timerange_to': Timestamp(2007, 1, 31, 23),
            'instrument': 'ThetaProbe-ML2X'
        }

        self.metadata_depth_from = 0.05
        self.metadata_depth_to = 0.05

        self.metadata_ref_ceop = dict(self.metadata_ref)
        self.metadata_ref_ceop['depth_from'] = ['multiple']
        self.metadata_ref_ceop['depth_to'] = ['multiple']
        self.metadata_ref_ceop['variable'] = ['ts', 'sm']
        self.metadata_ref_ceop['sensor'] = 'n.s'
Example #4
0
    def setUp(self) -> None:
        super(Test_DataFileHeaderValuesZipped, self).setUp()

        root = testdata_path / "zip_archives" / "header" / "Data_seperate_files_header_20170810_20180809.zip"
        filepath = Path(
            self.network_name, self.station_name,
            f"{self.network_name}_{self.network_name}_{self.station_name}_sm_{self.depth_from:.6f}_{self.depth_to:.6f}_{self.instrument}_20170810_20180809.stm"
        )

        self.file = DataFile(root, filepath)
Example #5
0
    def setUp(self) -> None:
        super(Test_DataFileCeopSepZipped, self).setUp()

        root = (
            testdata_path
            / "zip_archives"
            / "ceop"
            / "Data_seperate_files_20170810_20180809.zip"
        )
        filepath = Path(
            self.network_name,
            self.station_name,
            f"{self.network_name}_{self.network_name}_{self.station_name}_sm_{self.depth_from:.6f}_{self.depth_to:.6f}_{self.instrument}_20170810_20180809.stm",
        )

        self.file = DataFile(root, filepath)
Example #6
0
    def setUp(self):
        """
        Setup test data.
        """
        instrument = 'Cosmic-ray-Probe'
        d = Depth(0, 0.21)
        variable = 'soil_moisture'

        root = os.path.join(rpath, "Data_seperate_files_20170810_20180809")
        subpath = os.path.join(
            "COSMOS", "Barrow-ARM",
            "COSMOS_COSMOS_Barrow-ARM_sm_0.000000_0.210000_Cosmic-ray-Probe_20170810_20180809.stm"
        )

        self.sensor = Sensor(instrument,
                             variable,
                             d,
                             filehandler=DataFile(root, subpath))

        name = '{}_{}_{:1.6f}_{:1.6f}'.format(instrument, variable, d.start,
                                              d.end)

        assert self.sensor.name == name
Example #7
0
class Test_DataFileCeopSepUnzipped(unittest.TestCase):
    # from dir, no _load_data
    def setUp(self) -> None:
        self.station_name = "Barrow-ARM"
        self.network_name = "COSMOS"
        self.instrument = "Cosmic-ray-Probe"
        self.variable = "soil_moisture"
        self.depth_from = 0.0
        self.depth_to = 0.21

        self.longitude = -156.62870
        self.latitude = 71.32980
        self.elevation = 4.0

        root = testdata_path / "Data_seperate_files_20170810_20180809"
        filepath = Path(
            self.network_name,
            self.station_name,
            f"{self.network_name}_{self.network_name}_{self.station_name}_sm_{self.depth_from:.6f}_{self.depth_to:.6f}_{self.instrument}_20170810_20180809.stm",
        )

        self.file = DataFile(root, filepath)

        self.data_should_201708113 = {
            "datetime": "2017-08-11 13:00:00",
            self.variable: 0.183,
            f"{self.variable}_flag": "G",
            f"{self.variable}_orig_flag": "M",
        }

    def test_metadata(self):
        """test reading the loaded metadata for a file (incl. static meta)"""

        assert self.file.check_metadata()
        assert not self.file.check_metadata(filter_meta_dict={"network": "WRONGNAME"})

        flag = self.file.check_metadata(
            self.variable,
            allowed_depth=Depth(0, 0.21),
            filter_meta_dict={
                "longitude": self.longitude,
                "variable": self.variable,
            },
        )
        assert flag == True

        flag = self.file.check_metadata("nonexistingvar", Depth(0, 0.1))
        assert flag == False

        flag = self.file.check_metadata(self.variable, Depth(98.0, 99.0))
        assert flag == False

        assert self.file.metadata["station"].val == self.station_name
        assert self.file.metadata["network"].val == self.network_name
        assert self.file.metadata["instrument"].depth.start == self.depth_from
        assert self.file.metadata["instrument"].depth.end == self.depth_to
        assert self.file.metadata["instrument"].val == self.instrument
        assert self.file.metadata["longitude"].val == self.longitude
        assert self.file.metadata["latitude"].val == self.latitude
        assert self.file.metadata["variable"].val == self.variable

        self.file.check_metadata(
            filter_meta_dict={
                "timerange_from": datetime(2017, 8, 10, 0),
                "timerange_to": datetime(2018, 8, 9, 8),
            }
        )

    def test_data(self):
        """test reading the actual data for a file"""
        # todo: why is sm column called "variable"?
        timestamp = datetime(2017, 8, 11, 13)

        data = self.file.read_data()
        data_is = data.loc[timestamp]

        assert data_is[self.variable] == self.data_should_201708113[self.variable]
        assert (
            data_is[f"{self.variable}_flag"]
            == self.data_should_201708113[f"{self.variable}_flag"]
        )
        assert (
            data_is[f"{self.variable}_orig_flag"]
            == self.data_should_201708113[f"{self.variable}_orig_flag"]
        )

    def test_metadata_for_depth(self):
        """Check finding best matching metadata for file"""
        bestmeta = self.file.read_metadata(best_meta_for_sensor=True)
        allmeta = self.file.read_metadata(best_meta_for_sensor=False)

        assert bestmeta == allmeta  # no vars with multiple depths

        assert "saturation" not in allmeta
Example #8
0
class TestReaders(unittest.TestCase):
    """
    Old readers, kept to test backward compatibility
    """
    def setUp(self) -> None:
        filename_format_header_values_root = os.path.join(
            os.path.dirname(__file__), 'test_data', 'format_header_values')
        filename_format_header_values_filepath = os.path.join(
            'SMOSMANIA',
            'SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm'
        )

        self.filehandler_header_values = \
            DataFile(filename_format_header_values_root,
                     filename_format_header_values_filepath)

        filename_format_ceop_sep_root = os.path.join(os.path.dirname(__file__),
                                                     'test_data',
                                                     'format_ceop_sep')
        filename_format_ceop_sep_filepath = os.path.join(
            'SMOSMANIA',
            'SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm'
        )

        self.filehandler_ceop_sep = DataFile(
            filename_format_ceop_sep_root, filename_format_ceop_sep_filepath)

        filename_malformed_root = os.path.join(os.path.dirname(__file__),
                                               'test_data', 'malformed')
        filename_malformed_filepath = os.path.join('mal_formed_file.txt')

        with pytest.raises(IOError):
            DataFile(filename_malformed_root, filename_malformed_filepath)

        self.metadata_ref = {
            'network': 'SMOSMANIA',
            'station': 'Narbonne',
            'latitude': 43.15,
            'longitude': 2.9567,
            'elevation': 112.0,
            'variable': 'soil_moisture',
            'timerange_from': Timestamp(2007, 1, 1, 1),
            'timerange_to': Timestamp(2007, 1, 31, 23),
            'instrument': 'ThetaProbe-ML2X'
        }

        self.metadata_depth_from = 0.05
        self.metadata_depth_to = 0.05

        self.metadata_ref_ceop = dict(self.metadata_ref)
        self.metadata_ref_ceop['depth_from'] = ['multiple']
        self.metadata_ref_ceop['depth_to'] = ['multiple']
        self.metadata_ref_ceop['variable'] = ['ts', 'sm']
        self.metadata_ref_ceop['sensor'] = 'n.s'

    def test_get_info_from_file(self):

        header_elements, _, _, filename_elements = self.filehandler_ceop_sep.get_elements_from_file(
        )

        assert sorted(header_elements) == sorted([
            '2007/01/01', '01:00', '2007/01/01', '01:00', 'SMOSMANIA',
            'SMOSMANIA', 'Narbonne', '43.15000', '2.95670', '112.00', '0.05',
            '0.05', '0.2140', 'U', 'M'
        ])
        assert sorted(filename_elements) == sorted([
            'SMOSMANIA', 'SMOSMANIA', 'Narbonne', 'sm', '0.050000', '0.050000',
            'ThetaProbe-ML2X', '20070101', '20070131.stm'
        ])

    def test_get_metadata_header_values(self):

        metadata, depth = self.filehandler_header_values.get_metadata_header_values(
        )

        for key in metadata.keys():
            assert metadata[key].val == self.metadata_ref[key]
        assert metadata['variable'].depth.start == self.metadata_depth_from
        assert metadata['variable'].depth.end == self.metadata_depth_to

    def test_reader_format_header_values(self):
        filehandler = self.filehandler_header_values

        assert filehandler.metadata['network'].val == 'SMOSMANIA'
        assert filehandler.metadata['station'].val == 'Narbonne'
        assert filehandler.metadata['latitude'].val == 43.15
        assert filehandler.metadata['longitude'].val == 2.9567
        assert filehandler.metadata['elevation'].val == 112.0
        assert filehandler.metadata['variable'].val == 'soil_moisture'
        assert filehandler.metadata['instrument'].depth.start == 0.05
        assert filehandler.metadata['instrument'].depth.end == 0.05
        assert filehandler.metadata['instrument'].val == 'ThetaProbe-ML2X'

        data = filehandler.read_data()
        assert type(data) == pd.DataFrame
        assert data.index[7] == pd.Timestamp('2007-1-1 8:0:0')
        assert sorted(data.columns) == sorted(
            ['soil_moisture', 'soil_moisture_flag', 'soil_moisture_orig_flag'])
        assert data['soil_moisture'].values[8] == 0.2135
        assert data['soil_moisture_flag'].values[8] == 'U'
        assert data['soil_moisture_orig_flag'].values[8] == 'M'

    def test_get_metadata_ceop_sep(self):

        filehandler = self.filehandler_ceop_sep

        metadata, depth = filehandler.get_metadata_ceop_sep()
        for key in metadata.keys():
            assert metadata[key].val == self.metadata_ref[key]
        assert metadata['variable'].depth.start == self.metadata_depth_from
        assert metadata['variable'].depth.end == self.metadata_depth_to

    def test_reader_format_ceop_sep(self):
        filehandler = self.filehandler_ceop_sep

        assert filehandler.metadata['network'].val == 'SMOSMANIA'
        assert filehandler.metadata['station'].val == 'Narbonne'
        assert filehandler.metadata['latitude'].val == 43.15
        assert filehandler.metadata['longitude'].val == 2.9567
        assert filehandler.metadata['elevation'].val == 112.0
        assert filehandler.metadata['variable'].val == 'soil_moisture'
        assert filehandler.metadata['instrument'].depth.start == 0.05
        assert filehandler.metadata['instrument'].depth.end == 0.05
        assert filehandler.metadata['instrument'].val == 'ThetaProbe-ML2X'

        data = filehandler.read_data()
        assert type(data) == pd.DataFrame
        assert data.index[7] == pd.Timestamp('2007-1-1 8:0:0')
        assert sorted(data.columns) == sorted(
            ['soil_moisture', 'soil_moisture_flag', 'soil_moisture_orig_flag'])
        assert data['soil_moisture'].values[8] == 0.2135
        assert data['soil_moisture_flag'].values[8] == 'U'
        assert data['soil_moisture_orig_flag'].values[347] == 'M'

    def test_reader_get_format(self):
        assert self.filehandler_ceop_sep.file_type == 'ceop_sep'
        assert self.filehandler_header_values.file_type == 'header_values'

    def test_get_min_max_from_file(self):
        assert self.filehandler_ceop_sep.metadata['timerange_from'].val ==\
               Timestamp(2007, 1, 1, 1)
        assert self.filehandler_ceop_sep.metadata['timerange_to'].val == \
               Timestamp(2007, 1, 31, 23)

        assert self.filehandler_header_values.metadata['timerange_from'].val ==\
               Timestamp(2007, 1, 1, 1)
        assert self.filehandler_ceop_sep.metadata['timerange_to'].val == \
               Timestamp(2007, 1, 31, 23)
Example #9
0
def _read_station_dir(
    root: Union[IsmnRoot, Path, str],
    stat_dir: Union[Path, str],
    temp_root: Path,
) -> (dict, list):
    """
    Parallelizable function to read metadata for files in station dir
    """
    infos = []

    if not isinstance(root, IsmnRoot):
        proc_root = True
        root = IsmnRoot(root)
    else:
        proc_root = False

    csv = root.find_files(stat_dir, "*.csv")

    try:
        if len(csv) == 0:
            raise IsmnFileError(
                "Expected 1 csv file for station, found 0. "
                "Use empty static metadata."
            )
        else:
            if len(csv) > 1:
                infos.append(
                    f"Expected 1 csv file for station, found {len(csv)}. "
                    f"Use first file in dir."
                )
            static_meta_file = StaticMetaFile(
                root, csv[0], load_metadata=True, temp_root=temp_root
            )
            station_meta = static_meta_file.metadata
    except IsmnFileError as e:
        infos.append(f"Error loading static meta for station: {e}")
        station_meta = MetaData([MetaVar(k, v) for k, v in CSV_META_TEMPLATE.items()])

    data_files = root.find_files(stat_dir, "*.stm")

    filelist = []

    for file_path in data_files:
        try:
            f = DataFile(root, file_path, temp_root=temp_root)
        except IOError as e:
            infos.append(f"Error loading ismn file: {e}")
            continue

        f.metadata.merge(station_meta, inplace=True)

        f.metadata = f.metadata.best_meta_for_depth(
            Depth(
                f.metadata["instrument"].depth.start,
                f.metadata["instrument"].depth.end,
            )
        )

        network = f.metadata["network"].val
        station = f.metadata["station"].val

        filelist.append((network, station, f))

        infos.append(f"Processed file {file_path}")

    if proc_root:
        root.close()

    return filelist, infos
Example #10
0
    def from_metadata_csv(
        cls, data_root, meta_csv_file, network=None, temp_root=gettempdir()
    ):
        """
        Load a previously created and stored filelist from
        :func:`ismn.filecollection.IsmnFileCollection.to_metadata_csv`

        Parameters
        ----------
        data_root : IsmnRoot or str or Path
            Path where the ismn data is stored, can also be a zip file
        meta_csv_file : str or Path
            Csv file where the metadata is stored.
        network : list, optional (default: None)
            List of networks that are considered.
            Filehandlers for other networks are set to None.
        temp_root : str or Path, optional (default: gettempdir())
            Temporary folder where extracted data is copied during reading from
            zip archive.
        """
        if network is not None:
            network = np.atleast_1d(network)

        if isinstance(data_root, IsmnRoot):
            root = data_root
        else:
            root = IsmnRoot(data_root)

        print(f"Found existing ismn metadata in {meta_csv_file}.")

        metadata_df = _load_metadata_df(meta_csv_file)

        filelist = OrderedDict([])

        all_networks = metadata_df["network"]["val"].values

        columns = np.array(list(metadata_df.columns))

        for i, row in enumerate(metadata_df.values):  # todo: slow!?? parallelise?
            this_nw = all_networks[i]
            if (network is not None) and not np.isin([this_nw], network)[0]:
                f = None
                continue
            else:
                vars = np.unique(columns[:-2][:, 0])
                vals = row[:-2].reshape(-1, 3)

                metadata = MetaData(
                    [
                        MetaVar.from_tuple(
                            (vars[i], vals[i][2], vals[i][0], vals[i][1])
                        )
                        for i in range(len(vars))
                    ]
                )

                f = DataFile(
                    root=root,
                    file_path=str(PurePosixPath(row[-2])),
                    load_metadata=False,
                    temp_root=temp_root,
                )

                f.metadata = metadata
                f.file_type = row[-1]

                this_nw = f.metadata["network"].val

            if this_nw not in filelist.keys():
                filelist[this_nw] = []

            filelist[this_nw].append(f)

        if network is None:
            cls.metadata_df = metadata_df
        else:
            flags = np.isin(metadata_df["network"]["val"].values, network)
            cls.metadata_df = metadata_df.loc[flags]

        return cls(root, filelist=filelist)
Example #11
0
class TestReaders(unittest.TestCase):
    """
    Old readers, kept to test backward compatibility
    """
    def setUp(self) -> None:
        filename_format_header_values_root = os.path.join(
            os.path.dirname(__file__), "test_data", "format_header_values")
        filename_format_header_values_filepath = os.path.join(
            "SMOSMANIA",
            "SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm",
        )

        self.filehandler_header_values = DataFile(
            filename_format_header_values_root,
            filename_format_header_values_filepath,
        )

        filename_format_ceop_sep_root = os.path.join(os.path.dirname(__file__),
                                                     "test_data",
                                                     "format_ceop_sep")
        filename_format_ceop_sep_filepath = os.path.join(
            "SMOSMANIA",
            "SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm",
        )

        self.filehandler_ceop_sep = DataFile(
            filename_format_ceop_sep_root, filename_format_ceop_sep_filepath)

        filename_malformed_root = os.path.join(os.path.dirname(__file__),
                                               "test_data", "malformed")
        filename_malformed_filepath = os.path.join("mal_formed_file.txt")

        with pytest.raises(IOError):
            DataFile(filename_malformed_root, filename_malformed_filepath)

        self.metadata_ref = {
            "network": "SMOSMANIA",
            "station": "Narbonne",
            "latitude": 43.15,
            "longitude": 2.9567,
            "elevation": 112.0,
            "variable": "soil_moisture",
            "timerange_from": Timestamp(2007, 1, 1, 1),
            "timerange_to": Timestamp(2007, 1, 31, 23),
            "instrument": "ThetaProbe-ML2X",
        }

        self.metadata_depth_from = 0.05
        self.metadata_depth_to = 0.05

        self.metadata_ref_ceop = dict(self.metadata_ref)
        self.metadata_ref_ceop["depth_from"] = ["multiple"]
        self.metadata_ref_ceop["depth_to"] = ["multiple"]
        self.metadata_ref_ceop["variable"] = ["ts", "sm"]
        self.metadata_ref_ceop["sensor"] = "n.s"

    def test_get_info_from_file(self):

        (
            header_elements,
            _,
            _,
            filename_elements,
        ) = self.filehandler_ceop_sep.get_elements_from_file()

        assert sorted(header_elements) == sorted([
            "2007/01/01",
            "01:00",
            "2007/01/01",
            "01:00",
            "SMOSMANIA",
            "SMOSMANIA",
            "Narbonne",
            "43.15000",
            "2.95670",
            "112.00",
            "0.05",
            "0.05",
            "0.2140",
            "U",
            "M",
        ])
        assert sorted(filename_elements) == sorted([
            "SMOSMANIA",
            "SMOSMANIA",
            "Narbonne",
            "sm",
            "0.050000",
            "0.050000",
            "ThetaProbe-ML2X",
            "20070101",
            "20070131.stm",
        ])

    def test_get_metadata_header_values(self):

        (
            metadata,
            depth,
        ) = self.filehandler_header_values.get_metadata_header_values()

        for key in metadata.keys():
            assert metadata[key].val == self.metadata_ref[key]
        assert metadata["variable"].depth.start == self.metadata_depth_from
        assert metadata["variable"].depth.end == self.metadata_depth_to

    def test_reader_format_header_values(self):
        filehandler = self.filehandler_header_values

        assert filehandler.metadata["network"].val == "SMOSMANIA"
        assert filehandler.metadata["station"].val == "Narbonne"
        assert filehandler.metadata["latitude"].val == 43.15
        assert filehandler.metadata["longitude"].val == 2.9567
        assert filehandler.metadata["elevation"].val == 112.0
        assert filehandler.metadata["variable"].val == "soil_moisture"
        assert filehandler.metadata["instrument"].depth.start == 0.05
        assert filehandler.metadata["instrument"].depth.end == 0.05
        assert filehandler.metadata["instrument"].val == "ThetaProbe-ML2X"

        data = filehandler.read_data()
        assert type(data) == pd.DataFrame
        assert data.index[7] == pd.Timestamp("2007-1-1 8:0:0")
        assert sorted(data.columns) == sorted(
            ["soil_moisture", "soil_moisture_flag", "soil_moisture_orig_flag"])
        assert data["soil_moisture"].values[8] == 0.2135
        assert data["soil_moisture_flag"].values[8] == "U"
        assert data["soil_moisture_orig_flag"].values[8] == "M"

    def test_get_metadata_ceop_sep(self):

        filehandler = self.filehandler_ceop_sep

        metadata, depth = filehandler.get_metadata_ceop_sep()
        for key in metadata.keys():
            assert metadata[key].val == self.metadata_ref[key]
        assert metadata["variable"].depth.start == self.metadata_depth_from
        assert metadata["variable"].depth.end == self.metadata_depth_to

    def test_reader_format_ceop_sep(self):
        filehandler = self.filehandler_ceop_sep

        assert filehandler.metadata["network"].val == "SMOSMANIA"
        assert filehandler.metadata["station"].val == "Narbonne"
        assert filehandler.metadata["latitude"].val == 43.15
        assert filehandler.metadata["longitude"].val == 2.9567
        assert filehandler.metadata["elevation"].val == 112.0
        assert filehandler.metadata["variable"].val == "soil_moisture"
        assert filehandler.metadata["instrument"].depth.start == 0.05
        assert filehandler.metadata["instrument"].depth.end == 0.05
        assert filehandler.metadata["instrument"].val == "ThetaProbe-ML2X"

        data = filehandler.read_data()
        assert type(data) == pd.DataFrame
        assert data.index[7] == pd.Timestamp("2007-1-1 8:0:0")
        assert sorted(data.columns) == sorted(
            ["soil_moisture", "soil_moisture_flag", "soil_moisture_orig_flag"])
        assert data["soil_moisture"].values[8] == 0.2135
        assert data["soil_moisture_flag"].values[8] == "U"
        assert data["soil_moisture_orig_flag"].values[347] == "M"

    def test_reader_get_format(self):
        assert self.filehandler_ceop_sep.file_type == "ceop_sep"
        assert self.filehandler_header_values.file_type == "header_values"

    def test_get_min_max_from_file(self):
        assert self.filehandler_ceop_sep.metadata[
            "timerange_from"].val == Timestamp(2007, 1, 1, 1)
        assert self.filehandler_ceop_sep.metadata[
            "timerange_to"].val == Timestamp(2007, 1, 31, 23)

        assert self.filehandler_header_values.metadata[
            "timerange_from"].val == Timestamp(2007, 1, 1, 1)
        assert self.filehandler_ceop_sep.metadata[
            "timerange_to"].val == Timestamp(2007, 1, 31, 23)
Example #12
0
    def from_metadata_csv(cls,
                          data_root,
                          meta_csv_file,
                          network=None,
                          temp_root=gettempdir()):
        """
        Load a previously created and stored filelist from pkl.

        Parameters
        ----------
        data_root : IsmnRoot or str or Path
            Path where the ismn data is stored, can also be a zip file
        meta_csv_file : str or Path
            Csv file where the metadata is stored.
        network : list, optional (default: None)
            List of networks that are considered. Other filehandlers are set to None.
        temp_root : str or Path, optional (default: gettempdir())
            Temporary folder where extracted data is copied during reading from
            zip archive.
        """
        if network is not None:
            network = np.atleast_1d(network)

        if isinstance(data_root, IsmnRoot):
            root = data_root
        else:
            root = IsmnRoot(data_root)

        print(f"Found existing ismn metadata in {meta_csv_file}.")

        metadata_df = pd.read_csv(meta_csv_file,
                                  index_col=0,
                                  header=[0, 1],
                                  low_memory=False,
                                  engine='c')

        # parse date cols as datetime
        for col in ['timerange_from', 'timerange_to']:
            metadata_df[col, 'val'] = pd.to_datetime(metadata_df[col, 'val'])

        lvars = []
        for c in metadata_df.columns:
            if c[0] not in lvars:
                lvars.append(c[0])

        # we assume triples for all vars except these, so they must be at the end
        assert lvars[-2:] == ['file_path', 'file_type'], \
            "file_type and file_path must be at the end."

        filelist = OrderedDict([])

        all_networks = metadata_df['network']['val'].values

        columns = np.array(list(metadata_df.columns))

        for i, row in enumerate(
                metadata_df.values):  # todo: slow!?? parallelise?
            this_nw = all_networks[i]
            if (network is not None) and not np.isin([this_nw], network)[0]:
                f = None
                continue
            else:
                vars = np.unique(columns[:-2][:, 0])
                vals = row[:-2].reshape(-1, 3)

                metadata = MetaData([
                    MetaVar.from_tuple(
                        (vars[i], vals[i][2], vals[i][0], vals[i][1]))
                    for i in range(len(vars))
                ])

                f = DataFile(root=root,
                             file_path=str(PurePosixPath(row[-2])),
                             load_metadata=False,
                             temp_root=temp_root)

                f.metadata = metadata
                f.file_type = row[-1]

                this_nw = f.metadata['network'].val

            if this_nw not in filelist.keys():
                filelist[this_nw] = []

            filelist[this_nw].append(f)

        return cls(root, filelist=filelist)