def _get_sub_tables( self, resource_ids: Union[str, Sequence[str]]) -> Dict[str, pd.DataFrame]: """ return a dict of all """ ids = list(iterate(resource_ids)) import pdb pdb.set_trace() return {i: v[v._event_id_.isin(ids)] for i, v in self._dfs.items()}
def _iter_client(self, clients, method_name, *args, **kwargs): """ Iterate clients, return any values that aren't None. Return None if no data is found. """ for cli in iterate(clients): try: out = getattr(cli, method_name)(*args, **kwargs) except AttributeError: # client doesn't have required method. continue # if a non-empty object was obtained return it if out is not None and len(out): return out
def get_column(self, name: str) -> pd.Series: """ Return a Series of values from a dataframe column or index values. Parameters ---------- name The name of the column (or index level) to return. """ cols = self.data.columns index = self.data.index if name in cols: return self.data[name] elif name in set(iterate(getattr(index, "names", "name"))): vals = index.get_level_values(name) return pd.Series(vals, index=index) else: msg = f"{name} is not a column or index level" raise KeyError(msg)
def ids_in_bank(self, event_id: Union[str, Sequence[str]]) -> Set[str]: """ Determine if one or more event_ids are used by the bank. This function is faster than reading the entire index into memory to perform a similar check. Parameters ---------- event_id A single event id or sequence of event ids. Returns ------- A set of event_ids which are also found in the bank. """ eids = self.read_index(columns="event_id").values unique = set(np.unique(eids)) return unique & {str(x) for x in iterate(event_id)}
def __call__(self, obj, **kwargs) -> pd.DataFrame: """ Iterate an object tree and create a dataframe. Finds all instances of targeted class and returns a row for each. Parameters ---------- obj The object to recurse. """ df = self._func(obj, **kwargs) assert isinstance(df, pd.DataFrame), "must return a DataFrame instance" if not df.empty: # if df is not empty it should have all the columns # read in any UTCDateTime for col in set(iterate(self.utc_columns)) & set(df.columns): df[col] = df[col].apply(_timestampit) replace, dtypes = {"nan": "", "None": ""}, self.dtypes required_cols = self._base_required_columns return order_columns(df, required_cols, dtypes, replace)
def check_amp_filter_ids(event: Event, filter_ids: Optional[Union[str, Collection[str]]] = None): """ Check that all amplitudes have codes in filter_ids. """ filter_ids = set(str(x) for x in iterate(filter_ids)) # There is no amplitude specified if not filter_ids: return bad = [] bad_filters = [] for amp in event.amplitudes: if str(amp.filter_id) not in filter_ids: wid = amp.waveform_id nslc = (f"{wid.network_code}.{wid.station_code}." f"{wid.location_code}.{wid.channel_code}") bad.append(nslc) if amp.filter_id.id not in bad_filters: bad_filters.append(amp.filter_id.id) assert len(bad) == 0, ("Unexpected amplitude filter found:\n" f"event_id: {str(event.resource_id)}, " f"seed_id/s: {bad}, " f"filters_used: {set(bad_filters)}")
def _wrap(func): _decomposer = _VALIDATOR_STATE["decomposer"] for cls_ in iterate(cls): _decomposer.register(cls_)(func) return func