def read_csv(filename, read_csv_options={}, add_group=None, tz=None): """Read DataFrame from csv file This will read data from a csv file and then process the result with `niimpy.util.df_normalize`. Parameters ---------- filename : str filename of csv file read_csv_options: dict Dictionary of options to pandas.read_csv, if this is necessary for custom csv files. add_group : object If given, add a 'group' column with all values set to this. """ if tz is None: warnings.warn(DeprecationWarning("From now on, you should explicitely specify timezone with e.g. tz='Europe/Helsinki'"), stacklevel=2) df = pd.read_csv(filename, **read_csv_options) # df_normalize converts sets the index to time values and does other time # conversions. Inplace. util.df_normalize(df, tz=tz) df = _read_preprocess(df, add_group=add_group) return df
def timestamps(self, table, user, limit=None, offset=None, start=None, end=None): df = pd.read_sql("""SELECT {select_user} time FROM "{table}" WHERE 1 {where_user} {where_daterange} {order_by} {limit} """.format(table=table, **self._sql(user=user, limit=limit, offset=offset, start=start, end=end)), self.conn, params={'user': user}) if 'user' not in df: # Single user data: return util.to_datetime(df['time']) else: util.df_normalize(df, tz=self._tz) return df
def hourly(self, table, user, columns=[], limit=None, offset=None, start=None, end=None): if isinstance(columns, str): columns = [columns] if columns: column_selector = ",\n".join( " avg({0}) AS {0}_mean, stdev({0}) AS {0}_std, count({0}) AS {0}_count" .format(c) for c in columns) column_selector = ',\n' + column_selector else: column_selector = "" df = pd.read_sql("""SELECT {select_user} strftime('%Y-%m-%d', time, 'unixepoch', 'localtime') AS day, CAST(strftime('%H', time, 'unixepoch', 'localtime') AS INTEGER) AS hour, count(*) as count {column_selector} FROM ( SELECT * FROM "{table}" {order_by} {limit} ) WHERE 1 {where_user} {where_daterange} GROUP BY day, hour {limit} """.format(table=table, column_selector=column_selector, **self._sql(user=user, limit=limit, offset=offset, start=start, end=end)), self.conn, params={'user': user}) util.df_normalize(df, old_tz=util.SYSTEM_TZ, tz=self._tz) return df
def raw(self, table, user, limit=None, offset=None, start=None, end=None): df = pd.read_sql( """SELECT * FROM "{table}" WHERE 1 {where_user} {where_daterange} {order_by} {limit} """.format( table=table, **self._sql(user=user, limit=limit, offset=offset, start=start, end=end) #select_user=self._sql_select_user(user), where_user=self._sql_where_user(user), #limit=self._sql_limit(limit) ), self.conn, params={'user': user}) if 'time' in df: util.df_normalize(df, tz=self._tz) return df
def occurrence(self, table, user, bin_width=720, limit=None, offset=None, start=None, end=None): n_intervals = 3600 / bin_width interval_width = 60 / n_intervals df = pd.read_sql("""SELECT {select_user} day, hour, count(*) as occurrence, sum(bin_count) as count, group_concat(interval) AS withdata FROM ( SELECT strftime('%Y-%m-%d', time, 'unixepoch', 'localtime') AS day, CAST(strftime('%H', time, 'unixepoch', 'localtime') AS INTEGER) AS hour, CAST(strftime('%M', time, 'unixepoch', 'localtime')/:interval_width AS INTEGER) AS interval, count(*) as bin_count FROM "{table}" WHERE 1 {where_user} {where_daterange} GROUP BY day, hour, interval {limit} ) GROUP BY day, hour """.format(table=table, **self._sql(user=user, limit=limit, offset=offset, start=start, end=end)), self.conn, params={ 'user': user, 'interval_width': interval_width }) util.df_normalize(df, old_tz=util.SYSTEM_TZ, tz=self._tz) return df