def setUp(self) -> None: self.station_name = "Barrow-ARM" self.network_name = "COSMOS" self.instrument = "Cosmic-ray-Probe" self.variable = "soil_moisture" self.depth_from = 0.0 self.depth_to = 0.21 self.longitude = -156.62870 self.latitude = 71.32980 self.elevation = 4.0 root = testdata_path / "Data_seperate_files_20170810_20180809" filepath = Path( self.network_name, self.station_name, f"{self.network_name}_{self.network_name}_{self.station_name}_sm_{self.depth_from:.6f}_{self.depth_to:.6f}_{self.instrument}_20170810_20180809.stm", ) self.file = DataFile(root, filepath) self.data_should_201708113 = { "datetime": "2017-08-11 13:00:00", self.variable: 0.183, f"{self.variable}_flag": "G", f"{self.variable}_orig_flag": "M", }
def setUp(self) -> None: filename_format_header_values_root = os.path.join( os.path.dirname(__file__), "test_data", "format_header_values") filename_format_header_values_filepath = os.path.join( "SMOSMANIA", "SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm", ) self.filehandler_header_values = DataFile( filename_format_header_values_root, filename_format_header_values_filepath, ) filename_format_ceop_sep_root = os.path.join(os.path.dirname(__file__), "test_data", "format_ceop_sep") filename_format_ceop_sep_filepath = os.path.join( "SMOSMANIA", "SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm", ) self.filehandler_ceop_sep = DataFile( filename_format_ceop_sep_root, filename_format_ceop_sep_filepath) filename_malformed_root = os.path.join(os.path.dirname(__file__), "test_data", "malformed") filename_malformed_filepath = os.path.join("mal_formed_file.txt") with pytest.raises(IOError): DataFile(filename_malformed_root, filename_malformed_filepath) self.metadata_ref = { "network": "SMOSMANIA", "station": "Narbonne", "latitude": 43.15, "longitude": 2.9567, "elevation": 112.0, "variable": "soil_moisture", "timerange_from": Timestamp(2007, 1, 1, 1), "timerange_to": Timestamp(2007, 1, 31, 23), "instrument": "ThetaProbe-ML2X", } self.metadata_depth_from = 0.05 self.metadata_depth_to = 0.05 self.metadata_ref_ceop = dict(self.metadata_ref) self.metadata_ref_ceop["depth_from"] = ["multiple"] self.metadata_ref_ceop["depth_to"] = ["multiple"] self.metadata_ref_ceop["variable"] = ["ts", "sm"] self.metadata_ref_ceop["sensor"] = "n.s"
def setUp(self) -> None: filename_format_header_values_root = os.path.join( os.path.dirname(__file__), 'test_data', 'format_header_values') filename_format_header_values_filepath = os.path.join( 'SMOSMANIA', 'SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm' ) self.filehandler_header_values = \ DataFile(filename_format_header_values_root, filename_format_header_values_filepath) filename_format_ceop_sep_root = os.path.join(os.path.dirname(__file__), 'test_data', 'format_ceop_sep') filename_format_ceop_sep_filepath = os.path.join( 'SMOSMANIA', 'SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm' ) self.filehandler_ceop_sep = DataFile( filename_format_ceop_sep_root, filename_format_ceop_sep_filepath) filename_malformed_root = os.path.join(os.path.dirname(__file__), 'test_data', 'malformed') filename_malformed_filepath = os.path.join('mal_formed_file.txt') with pytest.raises(IOError): DataFile(filename_malformed_root, filename_malformed_filepath) self.metadata_ref = { 'network': 'SMOSMANIA', 'station': 'Narbonne', 'latitude': 43.15, 'longitude': 2.9567, 'elevation': 112.0, 'variable': 'soil_moisture', 'timerange_from': Timestamp(2007, 1, 1, 1), 'timerange_to': Timestamp(2007, 1, 31, 23), 'instrument': 'ThetaProbe-ML2X' } self.metadata_depth_from = 0.05 self.metadata_depth_to = 0.05 self.metadata_ref_ceop = dict(self.metadata_ref) self.metadata_ref_ceop['depth_from'] = ['multiple'] self.metadata_ref_ceop['depth_to'] = ['multiple'] self.metadata_ref_ceop['variable'] = ['ts', 'sm'] self.metadata_ref_ceop['sensor'] = 'n.s'
def setUp(self) -> None: super(Test_DataFileHeaderValuesZipped, self).setUp() root = testdata_path / "zip_archives" / "header" / "Data_seperate_files_header_20170810_20180809.zip" filepath = Path( self.network_name, self.station_name, f"{self.network_name}_{self.network_name}_{self.station_name}_sm_{self.depth_from:.6f}_{self.depth_to:.6f}_{self.instrument}_20170810_20180809.stm" ) self.file = DataFile(root, filepath)
def setUp(self) -> None: super(Test_DataFileCeopSepZipped, self).setUp() root = ( testdata_path / "zip_archives" / "ceop" / "Data_seperate_files_20170810_20180809.zip" ) filepath = Path( self.network_name, self.station_name, f"{self.network_name}_{self.network_name}_{self.station_name}_sm_{self.depth_from:.6f}_{self.depth_to:.6f}_{self.instrument}_20170810_20180809.stm", ) self.file = DataFile(root, filepath)
def setUp(self): """ Setup test data. """ instrument = 'Cosmic-ray-Probe' d = Depth(0, 0.21) variable = 'soil_moisture' root = os.path.join(rpath, "Data_seperate_files_20170810_20180809") subpath = os.path.join( "COSMOS", "Barrow-ARM", "COSMOS_COSMOS_Barrow-ARM_sm_0.000000_0.210000_Cosmic-ray-Probe_20170810_20180809.stm" ) self.sensor = Sensor(instrument, variable, d, filehandler=DataFile(root, subpath)) name = '{}_{}_{:1.6f}_{:1.6f}'.format(instrument, variable, d.start, d.end) assert self.sensor.name == name
class Test_DataFileCeopSepUnzipped(unittest.TestCase): # from dir, no _load_data def setUp(self) -> None: self.station_name = "Barrow-ARM" self.network_name = "COSMOS" self.instrument = "Cosmic-ray-Probe" self.variable = "soil_moisture" self.depth_from = 0.0 self.depth_to = 0.21 self.longitude = -156.62870 self.latitude = 71.32980 self.elevation = 4.0 root = testdata_path / "Data_seperate_files_20170810_20180809" filepath = Path( self.network_name, self.station_name, f"{self.network_name}_{self.network_name}_{self.station_name}_sm_{self.depth_from:.6f}_{self.depth_to:.6f}_{self.instrument}_20170810_20180809.stm", ) self.file = DataFile(root, filepath) self.data_should_201708113 = { "datetime": "2017-08-11 13:00:00", self.variable: 0.183, f"{self.variable}_flag": "G", f"{self.variable}_orig_flag": "M", } def test_metadata(self): """test reading the loaded metadata for a file (incl. static meta)""" assert self.file.check_metadata() assert not self.file.check_metadata(filter_meta_dict={"network": "WRONGNAME"}) flag = self.file.check_metadata( self.variable, allowed_depth=Depth(0, 0.21), filter_meta_dict={ "longitude": self.longitude, "variable": self.variable, }, ) assert flag == True flag = self.file.check_metadata("nonexistingvar", Depth(0, 0.1)) assert flag == False flag = self.file.check_metadata(self.variable, Depth(98.0, 99.0)) assert flag == False assert self.file.metadata["station"].val == self.station_name assert self.file.metadata["network"].val == self.network_name assert self.file.metadata["instrument"].depth.start == self.depth_from assert self.file.metadata["instrument"].depth.end == self.depth_to assert self.file.metadata["instrument"].val == self.instrument assert self.file.metadata["longitude"].val == self.longitude assert self.file.metadata["latitude"].val == self.latitude assert self.file.metadata["variable"].val == self.variable self.file.check_metadata( filter_meta_dict={ "timerange_from": datetime(2017, 8, 10, 0), "timerange_to": datetime(2018, 8, 9, 8), } ) def test_data(self): """test reading the actual data for a file""" # todo: why is sm column called "variable"? timestamp = datetime(2017, 8, 11, 13) data = self.file.read_data() data_is = data.loc[timestamp] assert data_is[self.variable] == self.data_should_201708113[self.variable] assert ( data_is[f"{self.variable}_flag"] == self.data_should_201708113[f"{self.variable}_flag"] ) assert ( data_is[f"{self.variable}_orig_flag"] == self.data_should_201708113[f"{self.variable}_orig_flag"] ) def test_metadata_for_depth(self): """Check finding best matching metadata for file""" bestmeta = self.file.read_metadata(best_meta_for_sensor=True) allmeta = self.file.read_metadata(best_meta_for_sensor=False) assert bestmeta == allmeta # no vars with multiple depths assert "saturation" not in allmeta
class TestReaders(unittest.TestCase): """ Old readers, kept to test backward compatibility """ def setUp(self) -> None: filename_format_header_values_root = os.path.join( os.path.dirname(__file__), 'test_data', 'format_header_values') filename_format_header_values_filepath = os.path.join( 'SMOSMANIA', 'SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm' ) self.filehandler_header_values = \ DataFile(filename_format_header_values_root, filename_format_header_values_filepath) filename_format_ceop_sep_root = os.path.join(os.path.dirname(__file__), 'test_data', 'format_ceop_sep') filename_format_ceop_sep_filepath = os.path.join( 'SMOSMANIA', 'SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm' ) self.filehandler_ceop_sep = DataFile( filename_format_ceop_sep_root, filename_format_ceop_sep_filepath) filename_malformed_root = os.path.join(os.path.dirname(__file__), 'test_data', 'malformed') filename_malformed_filepath = os.path.join('mal_formed_file.txt') with pytest.raises(IOError): DataFile(filename_malformed_root, filename_malformed_filepath) self.metadata_ref = { 'network': 'SMOSMANIA', 'station': 'Narbonne', 'latitude': 43.15, 'longitude': 2.9567, 'elevation': 112.0, 'variable': 'soil_moisture', 'timerange_from': Timestamp(2007, 1, 1, 1), 'timerange_to': Timestamp(2007, 1, 31, 23), 'instrument': 'ThetaProbe-ML2X' } self.metadata_depth_from = 0.05 self.metadata_depth_to = 0.05 self.metadata_ref_ceop = dict(self.metadata_ref) self.metadata_ref_ceop['depth_from'] = ['multiple'] self.metadata_ref_ceop['depth_to'] = ['multiple'] self.metadata_ref_ceop['variable'] = ['ts', 'sm'] self.metadata_ref_ceop['sensor'] = 'n.s' def test_get_info_from_file(self): header_elements, _, _, filename_elements = self.filehandler_ceop_sep.get_elements_from_file( ) assert sorted(header_elements) == sorted([ '2007/01/01', '01:00', '2007/01/01', '01:00', 'SMOSMANIA', 'SMOSMANIA', 'Narbonne', '43.15000', '2.95670', '112.00', '0.05', '0.05', '0.2140', 'U', 'M' ]) assert sorted(filename_elements) == sorted([ 'SMOSMANIA', 'SMOSMANIA', 'Narbonne', 'sm', '0.050000', '0.050000', 'ThetaProbe-ML2X', '20070101', '20070131.stm' ]) def test_get_metadata_header_values(self): metadata, depth = self.filehandler_header_values.get_metadata_header_values( ) for key in metadata.keys(): assert metadata[key].val == self.metadata_ref[key] assert metadata['variable'].depth.start == self.metadata_depth_from assert metadata['variable'].depth.end == self.metadata_depth_to def test_reader_format_header_values(self): filehandler = self.filehandler_header_values assert filehandler.metadata['network'].val == 'SMOSMANIA' assert filehandler.metadata['station'].val == 'Narbonne' assert filehandler.metadata['latitude'].val == 43.15 assert filehandler.metadata['longitude'].val == 2.9567 assert filehandler.metadata['elevation'].val == 112.0 assert filehandler.metadata['variable'].val == 'soil_moisture' assert filehandler.metadata['instrument'].depth.start == 0.05 assert filehandler.metadata['instrument'].depth.end == 0.05 assert filehandler.metadata['instrument'].val == 'ThetaProbe-ML2X' data = filehandler.read_data() assert type(data) == pd.DataFrame assert data.index[7] == pd.Timestamp('2007-1-1 8:0:0') assert sorted(data.columns) == sorted( ['soil_moisture', 'soil_moisture_flag', 'soil_moisture_orig_flag']) assert data['soil_moisture'].values[8] == 0.2135 assert data['soil_moisture_flag'].values[8] == 'U' assert data['soil_moisture_orig_flag'].values[8] == 'M' def test_get_metadata_ceop_sep(self): filehandler = self.filehandler_ceop_sep metadata, depth = filehandler.get_metadata_ceop_sep() for key in metadata.keys(): assert metadata[key].val == self.metadata_ref[key] assert metadata['variable'].depth.start == self.metadata_depth_from assert metadata['variable'].depth.end == self.metadata_depth_to def test_reader_format_ceop_sep(self): filehandler = self.filehandler_ceop_sep assert filehandler.metadata['network'].val == 'SMOSMANIA' assert filehandler.metadata['station'].val == 'Narbonne' assert filehandler.metadata['latitude'].val == 43.15 assert filehandler.metadata['longitude'].val == 2.9567 assert filehandler.metadata['elevation'].val == 112.0 assert filehandler.metadata['variable'].val == 'soil_moisture' assert filehandler.metadata['instrument'].depth.start == 0.05 assert filehandler.metadata['instrument'].depth.end == 0.05 assert filehandler.metadata['instrument'].val == 'ThetaProbe-ML2X' data = filehandler.read_data() assert type(data) == pd.DataFrame assert data.index[7] == pd.Timestamp('2007-1-1 8:0:0') assert sorted(data.columns) == sorted( ['soil_moisture', 'soil_moisture_flag', 'soil_moisture_orig_flag']) assert data['soil_moisture'].values[8] == 0.2135 assert data['soil_moisture_flag'].values[8] == 'U' assert data['soil_moisture_orig_flag'].values[347] == 'M' def test_reader_get_format(self): assert self.filehandler_ceop_sep.file_type == 'ceop_sep' assert self.filehandler_header_values.file_type == 'header_values' def test_get_min_max_from_file(self): assert self.filehandler_ceop_sep.metadata['timerange_from'].val ==\ Timestamp(2007, 1, 1, 1) assert self.filehandler_ceop_sep.metadata['timerange_to'].val == \ Timestamp(2007, 1, 31, 23) assert self.filehandler_header_values.metadata['timerange_from'].val ==\ Timestamp(2007, 1, 1, 1) assert self.filehandler_ceop_sep.metadata['timerange_to'].val == \ Timestamp(2007, 1, 31, 23)
def _read_station_dir( root: Union[IsmnRoot, Path, str], stat_dir: Union[Path, str], temp_root: Path, ) -> (dict, list): """ Parallelizable function to read metadata for files in station dir """ infos = [] if not isinstance(root, IsmnRoot): proc_root = True root = IsmnRoot(root) else: proc_root = False csv = root.find_files(stat_dir, "*.csv") try: if len(csv) == 0: raise IsmnFileError( "Expected 1 csv file for station, found 0. " "Use empty static metadata." ) else: if len(csv) > 1: infos.append( f"Expected 1 csv file for station, found {len(csv)}. " f"Use first file in dir." ) static_meta_file = StaticMetaFile( root, csv[0], load_metadata=True, temp_root=temp_root ) station_meta = static_meta_file.metadata except IsmnFileError as e: infos.append(f"Error loading static meta for station: {e}") station_meta = MetaData([MetaVar(k, v) for k, v in CSV_META_TEMPLATE.items()]) data_files = root.find_files(stat_dir, "*.stm") filelist = [] for file_path in data_files: try: f = DataFile(root, file_path, temp_root=temp_root) except IOError as e: infos.append(f"Error loading ismn file: {e}") continue f.metadata.merge(station_meta, inplace=True) f.metadata = f.metadata.best_meta_for_depth( Depth( f.metadata["instrument"].depth.start, f.metadata["instrument"].depth.end, ) ) network = f.metadata["network"].val station = f.metadata["station"].val filelist.append((network, station, f)) infos.append(f"Processed file {file_path}") if proc_root: root.close() return filelist, infos
def from_metadata_csv( cls, data_root, meta_csv_file, network=None, temp_root=gettempdir() ): """ Load a previously created and stored filelist from :func:`ismn.filecollection.IsmnFileCollection.to_metadata_csv` Parameters ---------- data_root : IsmnRoot or str or Path Path where the ismn data is stored, can also be a zip file meta_csv_file : str or Path Csv file where the metadata is stored. network : list, optional (default: None) List of networks that are considered. Filehandlers for other networks are set to None. temp_root : str or Path, optional (default: gettempdir()) Temporary folder where extracted data is copied during reading from zip archive. """ if network is not None: network = np.atleast_1d(network) if isinstance(data_root, IsmnRoot): root = data_root else: root = IsmnRoot(data_root) print(f"Found existing ismn metadata in {meta_csv_file}.") metadata_df = _load_metadata_df(meta_csv_file) filelist = OrderedDict([]) all_networks = metadata_df["network"]["val"].values columns = np.array(list(metadata_df.columns)) for i, row in enumerate(metadata_df.values): # todo: slow!?? parallelise? this_nw = all_networks[i] if (network is not None) and not np.isin([this_nw], network)[0]: f = None continue else: vars = np.unique(columns[:-2][:, 0]) vals = row[:-2].reshape(-1, 3) metadata = MetaData( [ MetaVar.from_tuple( (vars[i], vals[i][2], vals[i][0], vals[i][1]) ) for i in range(len(vars)) ] ) f = DataFile( root=root, file_path=str(PurePosixPath(row[-2])), load_metadata=False, temp_root=temp_root, ) f.metadata = metadata f.file_type = row[-1] this_nw = f.metadata["network"].val if this_nw not in filelist.keys(): filelist[this_nw] = [] filelist[this_nw].append(f) if network is None: cls.metadata_df = metadata_df else: flags = np.isin(metadata_df["network"]["val"].values, network) cls.metadata_df = metadata_df.loc[flags] return cls(root, filelist=filelist)
class TestReaders(unittest.TestCase): """ Old readers, kept to test backward compatibility """ def setUp(self) -> None: filename_format_header_values_root = os.path.join( os.path.dirname(__file__), "test_data", "format_header_values") filename_format_header_values_filepath = os.path.join( "SMOSMANIA", "SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm", ) self.filehandler_header_values = DataFile( filename_format_header_values_root, filename_format_header_values_filepath, ) filename_format_ceop_sep_root = os.path.join(os.path.dirname(__file__), "test_data", "format_ceop_sep") filename_format_ceop_sep_filepath = os.path.join( "SMOSMANIA", "SMOSMANIA_SMOSMANIA_Narbonne_sm_0.050000_0.050000_ThetaProbe-ML2X_20070101_20070131.stm", ) self.filehandler_ceop_sep = DataFile( filename_format_ceop_sep_root, filename_format_ceop_sep_filepath) filename_malformed_root = os.path.join(os.path.dirname(__file__), "test_data", "malformed") filename_malformed_filepath = os.path.join("mal_formed_file.txt") with pytest.raises(IOError): DataFile(filename_malformed_root, filename_malformed_filepath) self.metadata_ref = { "network": "SMOSMANIA", "station": "Narbonne", "latitude": 43.15, "longitude": 2.9567, "elevation": 112.0, "variable": "soil_moisture", "timerange_from": Timestamp(2007, 1, 1, 1), "timerange_to": Timestamp(2007, 1, 31, 23), "instrument": "ThetaProbe-ML2X", } self.metadata_depth_from = 0.05 self.metadata_depth_to = 0.05 self.metadata_ref_ceop = dict(self.metadata_ref) self.metadata_ref_ceop["depth_from"] = ["multiple"] self.metadata_ref_ceop["depth_to"] = ["multiple"] self.metadata_ref_ceop["variable"] = ["ts", "sm"] self.metadata_ref_ceop["sensor"] = "n.s" def test_get_info_from_file(self): ( header_elements, _, _, filename_elements, ) = self.filehandler_ceop_sep.get_elements_from_file() assert sorted(header_elements) == sorted([ "2007/01/01", "01:00", "2007/01/01", "01:00", "SMOSMANIA", "SMOSMANIA", "Narbonne", "43.15000", "2.95670", "112.00", "0.05", "0.05", "0.2140", "U", "M", ]) assert sorted(filename_elements) == sorted([ "SMOSMANIA", "SMOSMANIA", "Narbonne", "sm", "0.050000", "0.050000", "ThetaProbe-ML2X", "20070101", "20070131.stm", ]) def test_get_metadata_header_values(self): ( metadata, depth, ) = self.filehandler_header_values.get_metadata_header_values() for key in metadata.keys(): assert metadata[key].val == self.metadata_ref[key] assert metadata["variable"].depth.start == self.metadata_depth_from assert metadata["variable"].depth.end == self.metadata_depth_to def test_reader_format_header_values(self): filehandler = self.filehandler_header_values assert filehandler.metadata["network"].val == "SMOSMANIA" assert filehandler.metadata["station"].val == "Narbonne" assert filehandler.metadata["latitude"].val == 43.15 assert filehandler.metadata["longitude"].val == 2.9567 assert filehandler.metadata["elevation"].val == 112.0 assert filehandler.metadata["variable"].val == "soil_moisture" assert filehandler.metadata["instrument"].depth.start == 0.05 assert filehandler.metadata["instrument"].depth.end == 0.05 assert filehandler.metadata["instrument"].val == "ThetaProbe-ML2X" data = filehandler.read_data() assert type(data) == pd.DataFrame assert data.index[7] == pd.Timestamp("2007-1-1 8:0:0") assert sorted(data.columns) == sorted( ["soil_moisture", "soil_moisture_flag", "soil_moisture_orig_flag"]) assert data["soil_moisture"].values[8] == 0.2135 assert data["soil_moisture_flag"].values[8] == "U" assert data["soil_moisture_orig_flag"].values[8] == "M" def test_get_metadata_ceop_sep(self): filehandler = self.filehandler_ceop_sep metadata, depth = filehandler.get_metadata_ceop_sep() for key in metadata.keys(): assert metadata[key].val == self.metadata_ref[key] assert metadata["variable"].depth.start == self.metadata_depth_from assert metadata["variable"].depth.end == self.metadata_depth_to def test_reader_format_ceop_sep(self): filehandler = self.filehandler_ceop_sep assert filehandler.metadata["network"].val == "SMOSMANIA" assert filehandler.metadata["station"].val == "Narbonne" assert filehandler.metadata["latitude"].val == 43.15 assert filehandler.metadata["longitude"].val == 2.9567 assert filehandler.metadata["elevation"].val == 112.0 assert filehandler.metadata["variable"].val == "soil_moisture" assert filehandler.metadata["instrument"].depth.start == 0.05 assert filehandler.metadata["instrument"].depth.end == 0.05 assert filehandler.metadata["instrument"].val == "ThetaProbe-ML2X" data = filehandler.read_data() assert type(data) == pd.DataFrame assert data.index[7] == pd.Timestamp("2007-1-1 8:0:0") assert sorted(data.columns) == sorted( ["soil_moisture", "soil_moisture_flag", "soil_moisture_orig_flag"]) assert data["soil_moisture"].values[8] == 0.2135 assert data["soil_moisture_flag"].values[8] == "U" assert data["soil_moisture_orig_flag"].values[347] == "M" def test_reader_get_format(self): assert self.filehandler_ceop_sep.file_type == "ceop_sep" assert self.filehandler_header_values.file_type == "header_values" def test_get_min_max_from_file(self): assert self.filehandler_ceop_sep.metadata[ "timerange_from"].val == Timestamp(2007, 1, 1, 1) assert self.filehandler_ceop_sep.metadata[ "timerange_to"].val == Timestamp(2007, 1, 31, 23) assert self.filehandler_header_values.metadata[ "timerange_from"].val == Timestamp(2007, 1, 1, 1) assert self.filehandler_ceop_sep.metadata[ "timerange_to"].val == Timestamp(2007, 1, 31, 23)
def from_metadata_csv(cls, data_root, meta_csv_file, network=None, temp_root=gettempdir()): """ Load a previously created and stored filelist from pkl. Parameters ---------- data_root : IsmnRoot or str or Path Path where the ismn data is stored, can also be a zip file meta_csv_file : str or Path Csv file where the metadata is stored. network : list, optional (default: None) List of networks that are considered. Other filehandlers are set to None. temp_root : str or Path, optional (default: gettempdir()) Temporary folder where extracted data is copied during reading from zip archive. """ if network is not None: network = np.atleast_1d(network) if isinstance(data_root, IsmnRoot): root = data_root else: root = IsmnRoot(data_root) print(f"Found existing ismn metadata in {meta_csv_file}.") metadata_df = pd.read_csv(meta_csv_file, index_col=0, header=[0, 1], low_memory=False, engine='c') # parse date cols as datetime for col in ['timerange_from', 'timerange_to']: metadata_df[col, 'val'] = pd.to_datetime(metadata_df[col, 'val']) lvars = [] for c in metadata_df.columns: if c[0] not in lvars: lvars.append(c[0]) # we assume triples for all vars except these, so they must be at the end assert lvars[-2:] == ['file_path', 'file_type'], \ "file_type and file_path must be at the end." filelist = OrderedDict([]) all_networks = metadata_df['network']['val'].values columns = np.array(list(metadata_df.columns)) for i, row in enumerate( metadata_df.values): # todo: slow!?? parallelise? this_nw = all_networks[i] if (network is not None) and not np.isin([this_nw], network)[0]: f = None continue else: vars = np.unique(columns[:-2][:, 0]) vals = row[:-2].reshape(-1, 3) metadata = MetaData([ MetaVar.from_tuple( (vars[i], vals[i][2], vals[i][0], vals[i][1])) for i in range(len(vars)) ]) f = DataFile(root=root, file_path=str(PurePosixPath(row[-2])), load_metadata=False, temp_root=temp_root) f.metadata = metadata f.file_type = row[-1] this_nw = f.metadata['network'].val if this_nw not in filelist.keys(): filelist[this_nw] = [] filelist[this_nw].append(f) return cls(root, filelist=filelist)