예제 #1
0
    def test_MetaData(self):
        assert len(self.dat) == 4
        assert 'second' in self.dat
        assert self.dat[1] in self.dat
        assert tuple(self.dat[0]) == ('first', 1, 0., 1.)
        assert tuple(self.dat[1]) == ('second', 0, None, None)

        assert self.dat['dup'] == self.dat[3]

        assert self.dat.keys() == ['first', 'second', 'neg', 'dup']

        assert MetaData([MetaVar.from_tuple(
            ('first', 1, 0., 1.))]) == self.dat[['first']]
예제 #2
0
    def test_MetaData(self):
        assert len(self.dat) == 4
        assert "second" in self.dat
        assert self.dat[1] in self.dat
        assert tuple(self.dat[0]) == ("first", 1, 0.0, 1.0)
        assert tuple(self.dat[1]) == ("second", 0, None, None)

        assert self.dat["dup"] == self.dat[3]

        assert self.dat.keys() == ["first", "second", "neg", "dup"]

        assert (MetaData([MetaVar.from_tuple(
            ("first", 1, 0.0, 1.0))]) == self.dat[["first"]])
예제 #3
0
    def from_metadata_csv(
        cls, data_root, meta_csv_file, network=None, temp_root=gettempdir()
    ):
        """
        Load a previously created and stored filelist from
        :func:`ismn.filecollection.IsmnFileCollection.to_metadata_csv`

        Parameters
        ----------
        data_root : IsmnRoot or str or Path
            Path where the ismn data is stored, can also be a zip file
        meta_csv_file : str or Path
            Csv file where the metadata is stored.
        network : list, optional (default: None)
            List of networks that are considered.
            Filehandlers for other networks are set to None.
        temp_root : str or Path, optional (default: gettempdir())
            Temporary folder where extracted data is copied during reading from
            zip archive.
        """
        if network is not None:
            network = np.atleast_1d(network)

        if isinstance(data_root, IsmnRoot):
            root = data_root
        else:
            root = IsmnRoot(data_root)

        print(f"Found existing ismn metadata in {meta_csv_file}.")

        metadata_df = _load_metadata_df(meta_csv_file)

        filelist = OrderedDict([])

        all_networks = metadata_df["network"]["val"].values

        columns = np.array(list(metadata_df.columns))

        for i, row in enumerate(metadata_df.values):  # todo: slow!?? parallelise?
            this_nw = all_networks[i]
            if (network is not None) and not np.isin([this_nw], network)[0]:
                f = None
                continue
            else:
                vars = np.unique(columns[:-2][:, 0])
                vals = row[:-2].reshape(-1, 3)

                metadata = MetaData(
                    [
                        MetaVar.from_tuple(
                            (vars[i], vals[i][2], vals[i][0], vals[i][1])
                        )
                        for i in range(len(vars))
                    ]
                )

                f = DataFile(
                    root=root,
                    file_path=str(PurePosixPath(row[-2])),
                    load_metadata=False,
                    temp_root=temp_root,
                )

                f.metadata = metadata
                f.file_type = row[-1]

                this_nw = f.metadata["network"].val

            if this_nw not in filelist.keys():
                filelist[this_nw] = []

            filelist[this_nw].append(f)

        if network is None:
            cls.metadata_df = metadata_df
        else:
            flags = np.isin(metadata_df["network"]["val"].values, network)
            cls.metadata_df = metadata_df.loc[flags]

        return cls(root, filelist=filelist)
예제 #4
0
    def from_metadata_csv(cls,
                          data_root,
                          meta_csv_file,
                          network=None,
                          temp_root=gettempdir()):
        """
        Load a previously created and stored filelist from pkl.

        Parameters
        ----------
        data_root : IsmnRoot or str or Path
            Path where the ismn data is stored, can also be a zip file
        meta_csv_file : str or Path
            Csv file where the metadata is stored.
        network : list, optional (default: None)
            List of networks that are considered. Other filehandlers are set to None.
        temp_root : str or Path, optional (default: gettempdir())
            Temporary folder where extracted data is copied during reading from
            zip archive.
        """
        if network is not None:
            network = np.atleast_1d(network)

        if isinstance(data_root, IsmnRoot):
            root = data_root
        else:
            root = IsmnRoot(data_root)

        print(f"Found existing ismn metadata in {meta_csv_file}.")

        metadata_df = pd.read_csv(meta_csv_file,
                                  index_col=0,
                                  header=[0, 1],
                                  low_memory=False,
                                  engine='c')

        # parse date cols as datetime
        for col in ['timerange_from', 'timerange_to']:
            metadata_df[col, 'val'] = pd.to_datetime(metadata_df[col, 'val'])

        lvars = []
        for c in metadata_df.columns:
            if c[0] not in lvars:
                lvars.append(c[0])

        # we assume triples for all vars except these, so they must be at the end
        assert lvars[-2:] == ['file_path', 'file_type'], \
            "file_type and file_path must be at the end."

        filelist = OrderedDict([])

        all_networks = metadata_df['network']['val'].values

        columns = np.array(list(metadata_df.columns))

        for i, row in enumerate(
                metadata_df.values):  # todo: slow!?? parallelise?
            this_nw = all_networks[i]
            if (network is not None) and not np.isin([this_nw], network)[0]:
                f = None
                continue
            else:
                vars = np.unique(columns[:-2][:, 0])
                vals = row[:-2].reshape(-1, 3)

                metadata = MetaData([
                    MetaVar.from_tuple(
                        (vars[i], vals[i][2], vals[i][0], vals[i][1]))
                    for i in range(len(vars))
                ])

                f = DataFile(root=root,
                             file_path=str(PurePosixPath(row[-2])),
                             load_metadata=False,
                             temp_root=temp_root)

                f.metadata = metadata
                f.file_type = row[-1]

                this_nw = f.metadata['network'].val

            if this_nw not in filelist.keys():
                filelist[this_nw] = []

            filelist[this_nw].append(f)

        return cls(root, filelist=filelist)