def setUp(self) -> None: vars = [ MetaVar("first", 1, Depth(0, 1)), MetaVar("second", 0), MetaVar("neg", -99, Depth(-0.25, -1)), MetaVar("dup", "3rd", Depth(1, 3)), ] self.dat = MetaData(vars) self.other = MetaData( [MetaVar("dup", "3rd", Depth(2, 4)), MetaVar("4", 4)])
def metadata(self) -> MetaData: """ Collect the metadata from all sensors at station. """ sens_meta = [s.metadata for s in self.sensors.values()] station_meta = MetaData().merge(sens_meta, inplace=False) return station_meta
def get_metadata_ceop_sep(self, elements=None): """ Get metadata in the file format called CEOP in separate files. Parameters ---------- elements : dict, optional (default: None) Previously loaded elements can be passed here to avoid reading the file again. Returns ------- metadata : MetaData Metadata information. depth : Depth Sensor Depth, generated from file name """ if elements: headr = elements['headr'] last = elements['last'] fname = elements['fname'] else: headr, _, last, fname = self.get_elements_from_file() if len(fname) > 9: instr = '_'.join(fname[6:len(fname) - 2]) else: instr = fname[6] if fname[3] in const.VARIABLE_LUT: variable = const.VARIABLE_LUT[fname[3]] else: variable = fname[3] timerange_from = pd.to_datetime(' '.join(headr[:2])) timerange_to = pd.to_datetime(' '.join(last[:2])) depth = Depth(float(fname[4]), float(fname[5])) metadata = MetaData([ MetaVar('network', fname[1]), MetaVar('station', fname[2]), MetaVar('variable', variable, depth), MetaVar('instrument', instr, depth), MetaVar('timerange_from', timerange_from), MetaVar('timerange_to', timerange_to), MetaVar('latitude', float(headr[7])), MetaVar('longitude', float(headr[8])), MetaVar('elevation', float(headr[9])), ]) return metadata, depth
def test_MetaData(self): assert len(self.dat) == 4 assert "second" in self.dat assert self.dat[1] in self.dat assert tuple(self.dat[0]) == ("first", 1, 0.0, 1.0) assert tuple(self.dat[1]) == ("second", 0, None, None) assert self.dat["dup"] == self.dat[3] assert self.dat.keys() == ["first", "second", "neg", "dup"] assert (MetaData([MetaVar.from_tuple( ("first", 1, 0.0, 1.0))]) == self.dat[["first"]])
def metadata(self) -> MetaData: return MetaData() if self.filehandler is None \ else self.filehandler.metadata
def read_metadata(self) -> MetaData: """ Read csv file containing static variables into data frame. Returns ------- metadata : MetaData Static metadata read from csv file. """ if self.root.zip: if not self.root.isopen: self.root.open() with TemporaryDirectory(prefix='ismn', dir=self.temp_root) as tempdir: extracted = self.root.extract_file(self.file_path, tempdir) data = self.__read_csv(extracted) else: data = self.__read_csv(self.root.path / self.file_path) # read landcover classifications lc = data.loc[['land cover classification' ]][['value', 'quantity_source_name']] lc_dict = { 'CCI_landcover_2000': const.CSV_META_TEMPLATE['lc_2000'], 'CCI_landcover_2005': const.CSV_META_TEMPLATE['lc_2005'], 'CCI_landcover_2010': const.CSV_META_TEMPLATE['lc_2010'], 'insitu': const.CSV_META_TEMPLATE['lc_insitu'] } cl_dict = { 'koeppen_geiger_2007': const.CSV_META_TEMPLATE['climate_KG'], 'insitu': const.CSV_META_TEMPLATE['climate_insitu'] } for key in lc_dict.keys(): if key in lc['quantity_source_name'].values: if key != 'insitu': lc_dict[key] = np.int(lc.loc[lc['quantity_source_name'] == key]['value'].values[0]) else: lc_dict[key] = lc.loc[lc['quantity_source_name'] == key]['value'].values[0] logging.info( f'insitu land cover classification available: {self.file_path}' ) # read climate classifications cl = data.loc[['climate classification' ]][['value', 'quantity_source_name']] for key in cl_dict.keys(): if key in cl['quantity_source_name'].values: cl_dict[key] = cl.loc[cl['quantity_source_name'] == key]['value'].values[0] if key == 'insitu': logging.info( f'insitu climate classification available: {self.file_path}' ) metavars = [ MetaVar('lc_2000', lc_dict['CCI_landcover_2000']), MetaVar('lc_2005', lc_dict['CCI_landcover_2005']), MetaVar('lc_2010', lc_dict['CCI_landcover_2010']), MetaVar('lc_insitu', lc_dict['insitu']), MetaVar('climate_KG', cl_dict['koeppen_geiger_2007']), MetaVar('climate_insitu', cl_dict['insitu']), ] static_meta = { 'saturation': self.__read_field(data, 'saturation'), 'clay_fraction': self.__read_field(data, 'clay fraction', const.VARIABLE_LUT['cl_h']), 'sand_fraction': self.__read_field(data, 'sand fraction', const.VARIABLE_LUT['sa_h']), 'silt_fraction': self.__read_field(data, 'silt fraction', const.VARIABLE_LUT['si_h']), 'organic_carbon': self.__read_field(data, 'organic carbon', const.VARIABLE_LUT['oc_h']), } for name, v in static_meta.items(): if len(v) > 0: metavars += v else: metavars.append(MetaVar(name, const.CSV_META_TEMPLATE[name])) metadata = MetaData(metavars) return metadata
def from_metadata_csv(cls, data_root, meta_csv_file, network=None, temp_root=gettempdir()): """ Load a previously created and stored filelist from pkl. Parameters ---------- data_root : IsmnRoot or str or Path Path where the ismn data is stored, can also be a zip file meta_csv_file : str or Path Csv file where the metadata is stored. network : list, optional (default: None) List of networks that are considered. Other filehandlers are set to None. temp_root : str or Path, optional (default: gettempdir()) Temporary folder where extracted data is copied during reading from zip archive. """ if network is not None: network = np.atleast_1d(network) if isinstance(data_root, IsmnRoot): root = data_root else: root = IsmnRoot(data_root) print(f"Found existing ismn metadata in {meta_csv_file}.") metadata_df = pd.read_csv(meta_csv_file, index_col=0, header=[0, 1], low_memory=False, engine='c') # parse date cols as datetime for col in ['timerange_from', 'timerange_to']: metadata_df[col, 'val'] = pd.to_datetime(metadata_df[col, 'val']) lvars = [] for c in metadata_df.columns: if c[0] not in lvars: lvars.append(c[0]) # we assume triples for all vars except these, so they must be at the end assert lvars[-2:] == ['file_path', 'file_type'], \ "file_type and file_path must be at the end." filelist = OrderedDict([]) all_networks = metadata_df['network']['val'].values columns = np.array(list(metadata_df.columns)) for i, row in enumerate( metadata_df.values): # todo: slow!?? parallelise? this_nw = all_networks[i] if (network is not None) and not np.isin([this_nw], network)[0]: f = None continue else: vars = np.unique(columns[:-2][:, 0]) vals = row[:-2].reshape(-1, 3) metadata = MetaData([ MetaVar.from_tuple( (vars[i], vals[i][2], vals[i][0], vals[i][1])) for i in range(len(vars)) ]) f = DataFile(root=root, file_path=str(PurePosixPath(row[-2])), load_metadata=False, temp_root=temp_root) f.metadata = metadata f.file_type = row[-1] this_nw = f.metadata['network'].val if this_nw not in filelist.keys(): filelist[this_nw] = [] filelist[this_nw].append(f) return cls(root, filelist=filelist)
class Test_MetaData(unittest.TestCase): def setUp(self) -> None: vars = [ MetaVar("first", 1, Depth(0, 1)), MetaVar("second", 0), MetaVar("neg", -99, Depth(-0.25, -1)), MetaVar("dup", "3rd", Depth(1, 3)), ] self.dat = MetaData(vars) self.other = MetaData( [MetaVar("dup", "3rd", Depth(2, 4)), MetaVar("4", 4)]) def test_format(self): df = self.dat.to_pd() assert df["first", "val"] == 1 assert df["neg", "depth_from"] == -0.25 assert df["dup", "depth_to"] == 3 ddict = self.dat.to_dict() assert ddict["dup"] == [("3rd", 1, 3)] assert ddict["second"] == [(0, None, None)] def test_MetaData(self): assert len(self.dat) == 4 assert "second" in self.dat assert self.dat[1] in self.dat assert tuple(self.dat[0]) == ("first", 1, 0.0, 1.0) assert tuple(self.dat[1]) == ("second", 0, None, None) assert self.dat["dup"] == self.dat[3] assert self.dat.keys() == ["first", "second", "neg", "dup"] assert (MetaData([MetaVar.from_tuple( ("first", 1, 0.0, 1.0))]) == self.dat[["first"]]) def test_best_meta(self): self.dat.merge(self.other, inplace=True) assert len(self.dat) == 6 # no depths overlap best_meta_9_10 = self.dat.best_meta_for_depth(Depth(9, 10)) assert sorted(best_meta_9_10.keys()) == sorted(["second", "4"]) # all depths overlap best_meta_inf = self.dat.best_meta_for_depth(Depth(-np.inf, np.inf)) assert len(best_meta_inf) == len(self.dat) - 1 # one duplicate removed assert sorted(best_meta_inf.keys()) == sorted( ["second", "4", "dup", "first", "neg"]) # both valzes for dup where equally good, so the first was kept assert best_meta_inf["dup"].depth.start == 1 assert best_meta_inf["dup"].depth.end == 3 # all but one dup and neg depth overlaps best_meta_015 = self.dat.best_meta_for_depth(Depth(0, 1.5)) assert len(best_meta_015) == len(self.dat) - 2 assert best_meta_015["dup"].depth.start == 1 assert best_meta_015["dup"].depth.end == 3 # both duplicate depths overlap, but one more --> keep second, drop neg best_meta_231 = self.dat.best_meta_for_depth(Depth(2, 3.1)) assert (len(best_meta_231) == len(self.dat) - 3 ) # one duplicate and first and neg assert best_meta_231["dup"].depth.start == 2 assert best_meta_231["dup"].depth.end == 4 # both duplicate depths overlap, equally good -> keep first best_meta_23 = self.dat.best_meta_for_depth(Depth(2, 3)) assert len(best_meta_23) == len( self.dat) - 3 # one dup and first and neg assert best_meta_23["dup"].depth.start == 1.0 assert best_meta_23["dup"].depth.end == 3.0 # one matches perfectly best_meta_13 = self.dat.best_meta_for_depth(Depth(1, 3)) assert len(best_meta_13) == len(self.dat) - 2 # one dup only, no neg assert best_meta_13["dup"].depth.start == 1.0 assert best_meta_13["dup"].depth.end == 3.0 # check with negative best_meta_neg = self.dat.best_meta_for_depth(Depth(-0.5, 2.0)) # one dup was outside depth and is dropped, rest remains assert sorted(best_meta_neg.keys()) == sorted( ["first", "second", "dup", "4", "neg"]) assert best_meta_neg["dup"].depth.start == 1.0 assert best_meta_neg["dup"].depth.end == 3.0 # check with negative best_meta_only_neg = self.dat.best_meta_for_depth(Depth(-0.5, -1.0)) # only keep meta without depths and for neg depth assert sorted(best_meta_only_neg.keys()) == sorted( ["second", "neg", "4"]) assert best_meta_only_neg["neg"].depth.start == -0.25 assert best_meta_only_neg["neg"].depth.end == -1