def _index_from_iterable(self, iterable, update_time): """Iterate over an event iterable and dump to database.""" events, update_times, paths = [], [], [] max_mem = self._max_events_in_memory # this avoids the MRO each loop events_remain = False for cat, mtime, path in iterable: if cat is None: continue for event in cat: events.append(event) update_times.append(mtime) paths.append(path) if len(events) >= max_mem: # max limit exceeded, dump to db events_remain = True break # add new events to database df = obsplus.events.pd._default_cat_to_df(events) df["updated"] = to_datetime64(update_times) df["path"] = _remove_base_path(pd.Series(paths, dtype=object)) if len(df): df = _time_cols_to_ints(df) df_to_write = self._prepare_dataframe(df, EVENT_TYPES_INPUT) self._write_update(df_to_write, update_time) return events_remain
def _prep_write_df(self, df): """ Prepare the dataframe to put it into the HDF5 store. """ # ensure the bank path is not in the path column assert "path" in set(df.columns), f"{df} has no path column" df["path"] = _remove_base_path(df["path"], self.bank_path) dtype = WAVEFORM_DTYPES_INPUT df = (df.pipe(order_columns, required_columns=list(dtype)).pipe( cast_dtypes, dtype=dtype, inplace=True).pipe(convert_bytestrings, columns=self.index_str, inplace=True)) # populate index store and update metadata assert not df.isnull().any().any(), "null values found in index" return df