def _read_metadata(self): """ return the meta table """ self.ensure_bank_path_exists() with sql_connection(self.index_path) as con: sql = f'SELECT * FROM "{self._meta_node}";' out = pd.read_sql(sql, con) return out
def read_index(self, **kwargs) -> pd.DataFrame: """ Read the index and return a dataframe containing the event info. Parameters ---------- {get_events_params} """ self.ensure_bank_path_exists() # Make sure all times are numpy datetime64 kwargs = _dict_times_to_npdatetimes(kwargs) # a simple switch to prevent infinite recursion allow_update = kwargs.pop("_allow_update", True) # Circular search requires work to be done on the dataframe - we need # to get the whole dataframe then calculate the distances and search in # that circular_kwargs, kwargs = _sanitize_circular_search(**kwargs) with sql_connection(self.index_path) as con: try: df = _read_table(self._index_node, con, **kwargs) except pd.io.sql.DatabaseError: # if this database has never been updated, update now if allow_update and self.last_updated_timestamp < 1: self.update_index() return self.read_index(_allow_update=False, **kwargs) # else return empty index df = pd.DataFrame(columns=list(EVENT_TYPES_OUTPUT)) df = _ints_to_time_columns(df, columns=INT_COLUMNS).pipe( self._prepare_dataframe, dtypes=EVENT_TYPES_OUTPUT) if len(circular_kwargs) >= 3: # Requires at least latitude, longitude and min or max radius circular_ids = _get_ids(df, circular_kwargs) df = df[df.event_id.isin(circular_ids)] return df
def _write_update(self, df: pd.DataFrame, update_time=None): """ convert updates to dataframe, then append to index table """ # read in dataframe and cast to correct types assert not df.duplicated().any(), "update index has duplicate entries" # set both dfs to use index of event_id df = df.set_index("event_id") # get current events, but dont allow it to update again current = self.read_index(event_id=set(df.index), _allow_update=False) indicies_to_update = set(current["event_id"]) & set(df.index) # populate index store and update metadata with sql_connection(self.index_path) as con: if indicies_to_update: # delete rows that will be re-entered _drop_rows(self._index_node, con, event_id=indicies_to_update) node = self._index_node df.to_sql(node, con, if_exists="append", index_label="event_id") tables = _get_tables(con) if self._meta_node not in tables: meta = self._make_meta_table() meta.to_sql(self._meta_node, con, if_exists="replace") # update timestamp with suppress_warnings(): # ignore pandas collection warning if self.allow_update_timestamp: timestamp = update_time or time.time() dft = pd.DataFrame(timestamp, index=[0], columns=["time"]) dft.to_sql(self._time_node, con, if_exists="replace", index=False) self._metadata = meta self._index = None
def last_updated_timestamp(self): """ Return the last modified time stored in the index, else 0.0 """ with sql_connection(self.index_path) as con: try: return _read_table(self._time_node, con).loc[0, "time"] except (pd.io.sql.DatabaseError, KeyError): # table is empty return 0.0