Esempio n. 1
0
    def setUp(self) -> None:
        vars = [
            MetaVar("first", 1, Depth(0, 1)),
            MetaVar("second", 0),
            MetaVar("neg", -99, Depth(-0.25, -1)),
            MetaVar("dup", "3rd", Depth(1, 3)),
        ]

        self.dat = MetaData(vars)

        self.other = MetaData(
            [MetaVar("dup", "3rd", Depth(2, 4)),
             MetaVar("4", 4)])
Esempio n. 2
0
 def metadata(self) -> MetaData:
     """
     Collect the metadata from all sensors at station.
     """
     sens_meta = [s.metadata for s in self.sensors.values()]
     station_meta = MetaData().merge(sens_meta, inplace=False)
     return station_meta
Esempio n. 3
0
    def get_metadata_ceop_sep(self, elements=None):
        """
        Get metadata in the file format called CEOP in separate files.

        Parameters
        ----------
        elements : dict, optional (default: None)
            Previously loaded elements can be passed here to avoid reading the
            file again.

        Returns
        -------
        metadata : MetaData
            Metadata information.
        depth : Depth
            Sensor Depth, generated from file name
        """
        if elements:
            headr = elements['headr']
            last = elements['last']
            fname = elements['fname']
        else:
            headr, _, last, fname = self.get_elements_from_file()

        if len(fname) > 9:
            instr = '_'.join(fname[6:len(fname) - 2])
        else:
            instr = fname[6]

        if fname[3] in const.VARIABLE_LUT:
            variable = const.VARIABLE_LUT[fname[3]]
        else:
            variable = fname[3]

        timerange_from = pd.to_datetime(' '.join(headr[:2]))
        timerange_to = pd.to_datetime(' '.join(last[:2]))

        depth = Depth(float(fname[4]), float(fname[5]))

        metadata = MetaData([
            MetaVar('network', fname[1]),
            MetaVar('station', fname[2]),
            MetaVar('variable', variable, depth),
            MetaVar('instrument', instr, depth),
            MetaVar('timerange_from', timerange_from),
            MetaVar('timerange_to', timerange_to),
            MetaVar('latitude', float(headr[7])),
            MetaVar('longitude', float(headr[8])),
            MetaVar('elevation', float(headr[9])),
        ])

        return metadata, depth
Esempio n. 4
0
    def test_MetaData(self):
        assert len(self.dat) == 4
        assert "second" in self.dat
        assert self.dat[1] in self.dat
        assert tuple(self.dat[0]) == ("first", 1, 0.0, 1.0)
        assert tuple(self.dat[1]) == ("second", 0, None, None)

        assert self.dat["dup"] == self.dat[3]

        assert self.dat.keys() == ["first", "second", "neg", "dup"]

        assert (MetaData([MetaVar.from_tuple(
            ("first", 1, 0.0, 1.0))]) == self.dat[["first"]])
Esempio n. 5
0
 def metadata(self) -> MetaData:
     return MetaData() if self.filehandler is None \
         else self.filehandler.metadata
Esempio n. 6
0
    def read_metadata(self) -> MetaData:
        """
        Read csv file containing static variables into data frame.

        Returns
        -------
        metadata : MetaData
            Static metadata read from csv file.
        """
        if self.root.zip:
            if not self.root.isopen: self.root.open()
            with TemporaryDirectory(prefix='ismn',
                                    dir=self.temp_root) as tempdir:
                extracted = self.root.extract_file(self.file_path, tempdir)
                data = self.__read_csv(extracted)
        else:
            data = self.__read_csv(self.root.path / self.file_path)

        # read landcover classifications
        lc = data.loc[['land cover classification'
                       ]][['value', 'quantity_source_name']]

        lc_dict = {
            'CCI_landcover_2000': const.CSV_META_TEMPLATE['lc_2000'],
            'CCI_landcover_2005': const.CSV_META_TEMPLATE['lc_2005'],
            'CCI_landcover_2010': const.CSV_META_TEMPLATE['lc_2010'],
            'insitu': const.CSV_META_TEMPLATE['lc_insitu']
        }

        cl_dict = {
            'koeppen_geiger_2007': const.CSV_META_TEMPLATE['climate_KG'],
            'insitu': const.CSV_META_TEMPLATE['climate_insitu']
        }

        for key in lc_dict.keys():
            if key in lc['quantity_source_name'].values:
                if key != 'insitu':
                    lc_dict[key] = np.int(lc.loc[lc['quantity_source_name'] ==
                                                 key]['value'].values[0])
                else:
                    lc_dict[key] = lc.loc[lc['quantity_source_name'] ==
                                          key]['value'].values[0]
                    logging.info(
                        f'insitu land cover classification available: {self.file_path}'
                    )

        # read climate classifications
        cl = data.loc[['climate classification'
                       ]][['value', 'quantity_source_name']]
        for key in cl_dict.keys():
            if key in cl['quantity_source_name'].values:
                cl_dict[key] = cl.loc[cl['quantity_source_name'] ==
                                      key]['value'].values[0]
                if key == 'insitu':
                    logging.info(
                        f'insitu climate classification available: {self.file_path}'
                    )

        metavars = [
            MetaVar('lc_2000', lc_dict['CCI_landcover_2000']),
            MetaVar('lc_2005', lc_dict['CCI_landcover_2005']),
            MetaVar('lc_2010', lc_dict['CCI_landcover_2010']),
            MetaVar('lc_insitu', lc_dict['insitu']),
            MetaVar('climate_KG', cl_dict['koeppen_geiger_2007']),
            MetaVar('climate_insitu', cl_dict['insitu']),
        ]

        static_meta = {
            'saturation':
            self.__read_field(data, 'saturation'),
            'clay_fraction':
            self.__read_field(data, 'clay fraction',
                              const.VARIABLE_LUT['cl_h']),
            'sand_fraction':
            self.__read_field(data, 'sand fraction',
                              const.VARIABLE_LUT['sa_h']),
            'silt_fraction':
            self.__read_field(data, 'silt fraction',
                              const.VARIABLE_LUT['si_h']),
            'organic_carbon':
            self.__read_field(data, 'organic carbon',
                              const.VARIABLE_LUT['oc_h']),
        }

        for name, v in static_meta.items():
            if len(v) > 0:
                metavars += v
            else:
                metavars.append(MetaVar(name, const.CSV_META_TEMPLATE[name]))

        metadata = MetaData(metavars)

        return metadata
Esempio n. 7
0
    def from_metadata_csv(cls,
                          data_root,
                          meta_csv_file,
                          network=None,
                          temp_root=gettempdir()):
        """
        Load a previously created and stored filelist from pkl.

        Parameters
        ----------
        data_root : IsmnRoot or str or Path
            Path where the ismn data is stored, can also be a zip file
        meta_csv_file : str or Path
            Csv file where the metadata is stored.
        network : list, optional (default: None)
            List of networks that are considered. Other filehandlers are set to None.
        temp_root : str or Path, optional (default: gettempdir())
            Temporary folder where extracted data is copied during reading from
            zip archive.
        """
        if network is not None:
            network = np.atleast_1d(network)

        if isinstance(data_root, IsmnRoot):
            root = data_root
        else:
            root = IsmnRoot(data_root)

        print(f"Found existing ismn metadata in {meta_csv_file}.")

        metadata_df = pd.read_csv(meta_csv_file,
                                  index_col=0,
                                  header=[0, 1],
                                  low_memory=False,
                                  engine='c')

        # parse date cols as datetime
        for col in ['timerange_from', 'timerange_to']:
            metadata_df[col, 'val'] = pd.to_datetime(metadata_df[col, 'val'])

        lvars = []
        for c in metadata_df.columns:
            if c[0] not in lvars:
                lvars.append(c[0])

        # we assume triples for all vars except these, so they must be at the end
        assert lvars[-2:] == ['file_path', 'file_type'], \
            "file_type and file_path must be at the end."

        filelist = OrderedDict([])

        all_networks = metadata_df['network']['val'].values

        columns = np.array(list(metadata_df.columns))

        for i, row in enumerate(
                metadata_df.values):  # todo: slow!?? parallelise?
            this_nw = all_networks[i]
            if (network is not None) and not np.isin([this_nw], network)[0]:
                f = None
                continue
            else:
                vars = np.unique(columns[:-2][:, 0])
                vals = row[:-2].reshape(-1, 3)

                metadata = MetaData([
                    MetaVar.from_tuple(
                        (vars[i], vals[i][2], vals[i][0], vals[i][1]))
                    for i in range(len(vars))
                ])

                f = DataFile(root=root,
                             file_path=str(PurePosixPath(row[-2])),
                             load_metadata=False,
                             temp_root=temp_root)

                f.metadata = metadata
                f.file_type = row[-1]

                this_nw = f.metadata['network'].val

            if this_nw not in filelist.keys():
                filelist[this_nw] = []

            filelist[this_nw].append(f)

        return cls(root, filelist=filelist)
Esempio n. 8
0
class Test_MetaData(unittest.TestCase):
    def setUp(self) -> None:
        vars = [
            MetaVar("first", 1, Depth(0, 1)),
            MetaVar("second", 0),
            MetaVar("neg", -99, Depth(-0.25, -1)),
            MetaVar("dup", "3rd", Depth(1, 3)),
        ]

        self.dat = MetaData(vars)

        self.other = MetaData(
            [MetaVar("dup", "3rd", Depth(2, 4)),
             MetaVar("4", 4)])

    def test_format(self):
        df = self.dat.to_pd()
        assert df["first", "val"] == 1
        assert df["neg", "depth_from"] == -0.25
        assert df["dup", "depth_to"] == 3
        ddict = self.dat.to_dict()
        assert ddict["dup"] == [("3rd", 1, 3)]
        assert ddict["second"] == [(0, None, None)]

    def test_MetaData(self):
        assert len(self.dat) == 4
        assert "second" in self.dat
        assert self.dat[1] in self.dat
        assert tuple(self.dat[0]) == ("first", 1, 0.0, 1.0)
        assert tuple(self.dat[1]) == ("second", 0, None, None)

        assert self.dat["dup"] == self.dat[3]

        assert self.dat.keys() == ["first", "second", "neg", "dup"]

        assert (MetaData([MetaVar.from_tuple(
            ("first", 1, 0.0, 1.0))]) == self.dat[["first"]])

    def test_best_meta(self):
        self.dat.merge(self.other, inplace=True)

        assert len(self.dat) == 6

        # no depths overlap
        best_meta_9_10 = self.dat.best_meta_for_depth(Depth(9, 10))
        assert sorted(best_meta_9_10.keys()) == sorted(["second", "4"])

        # all depths overlap
        best_meta_inf = self.dat.best_meta_for_depth(Depth(-np.inf, np.inf))
        assert len(best_meta_inf) == len(self.dat) - 1  # one duplicate removed
        assert sorted(best_meta_inf.keys()) == sorted(
            ["second", "4", "dup", "first", "neg"])
        # both valzes for dup where equally good, so the first was kept
        assert best_meta_inf["dup"].depth.start == 1
        assert best_meta_inf["dup"].depth.end == 3

        # all but one dup and neg depth overlaps
        best_meta_015 = self.dat.best_meta_for_depth(Depth(0, 1.5))
        assert len(best_meta_015) == len(self.dat) - 2
        assert best_meta_015["dup"].depth.start == 1
        assert best_meta_015["dup"].depth.end == 3

        # both duplicate depths overlap, but one more --> keep second, drop neg
        best_meta_231 = self.dat.best_meta_for_depth(Depth(2, 3.1))
        assert (len(best_meta_231) == len(self.dat) - 3
                )  # one duplicate and first and neg
        assert best_meta_231["dup"].depth.start == 2
        assert best_meta_231["dup"].depth.end == 4

        # both duplicate depths overlap, equally good -> keep first
        best_meta_23 = self.dat.best_meta_for_depth(Depth(2, 3))
        assert len(best_meta_23) == len(
            self.dat) - 3  # one dup and first and neg
        assert best_meta_23["dup"].depth.start == 1.0
        assert best_meta_23["dup"].depth.end == 3.0

        # one matches perfectly
        best_meta_13 = self.dat.best_meta_for_depth(Depth(1, 3))
        assert len(best_meta_13) == len(self.dat) - 2  # one dup only, no neg
        assert best_meta_13["dup"].depth.start == 1.0
        assert best_meta_13["dup"].depth.end == 3.0

        # check with negative
        best_meta_neg = self.dat.best_meta_for_depth(Depth(-0.5, 2.0))
        # one dup was outside depth and is dropped, rest remains
        assert sorted(best_meta_neg.keys()) == sorted(
            ["first", "second", "dup", "4", "neg"])
        assert best_meta_neg["dup"].depth.start == 1.0
        assert best_meta_neg["dup"].depth.end == 3.0

        # check with negative
        best_meta_only_neg = self.dat.best_meta_for_depth(Depth(-0.5, -1.0))
        # only keep meta without depths and for neg depth
        assert sorted(best_meta_only_neg.keys()) == sorted(
            ["second", "neg", "4"])
        assert best_meta_only_neg["neg"].depth.start == -0.25
        assert best_meta_only_neg["neg"].depth.end == -1