def test_MetaData(self): assert len(self.dat) == 4 assert 'second' in self.dat assert self.dat[1] in self.dat assert tuple(self.dat[0]) == ('first', 1, 0., 1.) assert tuple(self.dat[1]) == ('second', 0, None, None) assert self.dat['dup'] == self.dat[3] assert self.dat.keys() == ['first', 'second', 'neg', 'dup'] assert MetaData([MetaVar.from_tuple( ('first', 1, 0., 1.))]) == self.dat[['first']]
def test_MetaData(self): assert len(self.dat) == 4 assert "second" in self.dat assert self.dat[1] in self.dat assert tuple(self.dat[0]) == ("first", 1, 0.0, 1.0) assert tuple(self.dat[1]) == ("second", 0, None, None) assert self.dat["dup"] == self.dat[3] assert self.dat.keys() == ["first", "second", "neg", "dup"] assert (MetaData([MetaVar.from_tuple( ("first", 1, 0.0, 1.0))]) == self.dat[["first"]])
def from_metadata_csv( cls, data_root, meta_csv_file, network=None, temp_root=gettempdir() ): """ Load a previously created and stored filelist from :func:`ismn.filecollection.IsmnFileCollection.to_metadata_csv` Parameters ---------- data_root : IsmnRoot or str or Path Path where the ismn data is stored, can also be a zip file meta_csv_file : str or Path Csv file where the metadata is stored. network : list, optional (default: None) List of networks that are considered. Filehandlers for other networks are set to None. temp_root : str or Path, optional (default: gettempdir()) Temporary folder where extracted data is copied during reading from zip archive. """ if network is not None: network = np.atleast_1d(network) if isinstance(data_root, IsmnRoot): root = data_root else: root = IsmnRoot(data_root) print(f"Found existing ismn metadata in {meta_csv_file}.") metadata_df = _load_metadata_df(meta_csv_file) filelist = OrderedDict([]) all_networks = metadata_df["network"]["val"].values columns = np.array(list(metadata_df.columns)) for i, row in enumerate(metadata_df.values): # todo: slow!?? parallelise? this_nw = all_networks[i] if (network is not None) and not np.isin([this_nw], network)[0]: f = None continue else: vars = np.unique(columns[:-2][:, 0]) vals = row[:-2].reshape(-1, 3) metadata = MetaData( [ MetaVar.from_tuple( (vars[i], vals[i][2], vals[i][0], vals[i][1]) ) for i in range(len(vars)) ] ) f = DataFile( root=root, file_path=str(PurePosixPath(row[-2])), load_metadata=False, temp_root=temp_root, ) f.metadata = metadata f.file_type = row[-1] this_nw = f.metadata["network"].val if this_nw not in filelist.keys(): filelist[this_nw] = [] filelist[this_nw].append(f) if network is None: cls.metadata_df = metadata_df else: flags = np.isin(metadata_df["network"]["val"].values, network) cls.metadata_df = metadata_df.loc[flags] return cls(root, filelist=filelist)
def from_metadata_csv(cls, data_root, meta_csv_file, network=None, temp_root=gettempdir()): """ Load a previously created and stored filelist from pkl. Parameters ---------- data_root : IsmnRoot or str or Path Path where the ismn data is stored, can also be a zip file meta_csv_file : str or Path Csv file where the metadata is stored. network : list, optional (default: None) List of networks that are considered. Other filehandlers are set to None. temp_root : str or Path, optional (default: gettempdir()) Temporary folder where extracted data is copied during reading from zip archive. """ if network is not None: network = np.atleast_1d(network) if isinstance(data_root, IsmnRoot): root = data_root else: root = IsmnRoot(data_root) print(f"Found existing ismn metadata in {meta_csv_file}.") metadata_df = pd.read_csv(meta_csv_file, index_col=0, header=[0, 1], low_memory=False, engine='c') # parse date cols as datetime for col in ['timerange_from', 'timerange_to']: metadata_df[col, 'val'] = pd.to_datetime(metadata_df[col, 'val']) lvars = [] for c in metadata_df.columns: if c[0] not in lvars: lvars.append(c[0]) # we assume triples for all vars except these, so they must be at the end assert lvars[-2:] == ['file_path', 'file_type'], \ "file_type and file_path must be at the end." filelist = OrderedDict([]) all_networks = metadata_df['network']['val'].values columns = np.array(list(metadata_df.columns)) for i, row in enumerate( metadata_df.values): # todo: slow!?? parallelise? this_nw = all_networks[i] if (network is not None) and not np.isin([this_nw], network)[0]: f = None continue else: vars = np.unique(columns[:-2][:, 0]) vals = row[:-2].reshape(-1, 3) metadata = MetaData([ MetaVar.from_tuple( (vars[i], vals[i][2], vals[i][0], vals[i][1])) for i in range(len(vars)) ]) f = DataFile(root=root, file_path=str(PurePosixPath(row[-2])), load_metadata=False, temp_root=temp_root) f.metadata = metadata f.file_type = row[-1] this_nw = f.metadata['network'].val if this_nw not in filelist.keys(): filelist[this_nw] = [] filelist[this_nw].append(f) return cls(root, filelist=filelist)