Example #1
0
 def _read_metadata(self):
     """ return the meta table """
     self.ensure_bank_path_exists()
     with sql_connection(self.index_path) as con:
         sql = f'SELECT * FROM "{self._meta_node}";'
         out = pd.read_sql(sql, con)
     return out
Example #2
0
    def read_index(self, **kwargs) -> pd.DataFrame:
        """
        Read the index and return a dataframe containing the event info.

        Parameters
        ----------
        {get_events_params}
        """
        self.ensure_bank_path_exists()
        # Make sure all times are numpy datetime64
        kwargs = _dict_times_to_npdatetimes(kwargs)
        # a simple switch to prevent infinite recursion
        allow_update = kwargs.pop("_allow_update", True)
        # Circular search requires work to be done on the dataframe - we need
        # to get the whole dataframe then calculate the distances and search in
        # that
        circular_kwargs, kwargs = _sanitize_circular_search(**kwargs)
        with sql_connection(self.index_path) as con:
            try:
                df = _read_table(self._index_node, con, **kwargs)
            except pd.io.sql.DatabaseError:
                # if this database has never been updated, update now
                if allow_update and self.last_updated_timestamp < 1:
                    self.update_index()
                    return self.read_index(_allow_update=False, **kwargs)
                # else return empty index
                df = pd.DataFrame(columns=list(EVENT_TYPES_OUTPUT))
        df = _ints_to_time_columns(df, columns=INT_COLUMNS).pipe(
            self._prepare_dataframe, dtypes=EVENT_TYPES_OUTPUT)
        if len(circular_kwargs) >= 3:
            # Requires at least latitude, longitude and min or max radius
            circular_ids = _get_ids(df, circular_kwargs)
            df = df[df.event_id.isin(circular_ids)]
        return df
Example #3
0
    def _write_update(self, df: pd.DataFrame, update_time=None):
        """ convert updates to dataframe, then append to index table """
        # read in dataframe and cast to correct types
        assert not df.duplicated().any(), "update index has duplicate entries"

        # set both dfs to use index of event_id
        df = df.set_index("event_id")
        # get current events, but dont allow it to update again
        current = self.read_index(event_id=set(df.index), _allow_update=False)
        indicies_to_update = set(current["event_id"]) & set(df.index)
        # populate index store and update metadata
        with sql_connection(self.index_path) as con:
            if indicies_to_update:  # delete rows that will be re-entered
                _drop_rows(self._index_node, con, event_id=indicies_to_update)
            node = self._index_node
            df.to_sql(node, con, if_exists="append", index_label="event_id")
            tables = _get_tables(con)
            if self._meta_node not in tables:
                meta = self._make_meta_table()
                meta.to_sql(self._meta_node, con, if_exists="replace")
            # update timestamp
            with suppress_warnings():  # ignore pandas collection warning
                if self.allow_update_timestamp:
                    timestamp = update_time or time.time()
                    dft = pd.DataFrame(timestamp, index=[0], columns=["time"])
                    dft.to_sql(self._time_node,
                               con,
                               if_exists="replace",
                               index=False)
        self._metadata = meta
        self._index = None
Example #4
0
 def last_updated_timestamp(self):
     """ Return the last modified time stored in the index, else 0.0 """
     with sql_connection(self.index_path) as con:
         try:
             return _read_table(self._time_node, con).loc[0, "time"]
         except (pd.io.sql.DatabaseError, KeyError):  # table is empty
             return 0.0