예제 #1
0
def read_csv(filename, read_csv_options={}, add_group=None,
             tz=None):
    """Read DataFrame from csv file

    This will read data from a csv file and then process the result with
    `niimpy.util.df_normalize`.


    Parameters
    ----------

    filename : str
        filename of csv file

    read_csv_options: dict
        Dictionary of options to pandas.read_csv, if this is necessary for custom
        csv files.

    add_group : object
        If given, add a 'group' column with all values set to this.

    """
    if tz is None:
        warnings.warn(DeprecationWarning("From now on, you should explicitely specify timezone with e.g. tz='Europe/Helsinki'"), stacklevel=2)

    df = pd.read_csv(filename, **read_csv_options)

    # df_normalize converts sets the index to time values and does other time
    # conversions.  Inplace.
    util.df_normalize(df, tz=tz)
    df = _read_preprocess(df, add_group=add_group)
    return df
예제 #2
0
 def timestamps(self,
                table,
                user,
                limit=None,
                offset=None,
                start=None,
                end=None):
     df = pd.read_sql("""SELECT
                             {select_user} time
                         FROM "{table}"
                         WHERE 1 {where_user} {where_daterange}
                         {order_by}
                         {limit}
                     """.format(table=table,
                                **self._sql(user=user,
                                            limit=limit,
                                            offset=offset,
                                            start=start,
                                            end=end)),
                      self.conn,
                      params={'user': user})
     if 'user' not in df:
         # Single user data:
         return util.to_datetime(df['time'])
     else:
         util.df_normalize(df, tz=self._tz)
         return df
예제 #3
0
    def hourly(self,
               table,
               user,
               columns=[],
               limit=None,
               offset=None,
               start=None,
               end=None):
        if isinstance(columns, str):
            columns = [columns]
        if columns:
            column_selector = ",\n".join(
                "    avg({0}) AS {0}_mean, stdev({0}) AS {0}_std, count({0}) AS {0}_count"
                .format(c) for c in columns)
            column_selector = ',\n' + column_selector
        else:
            column_selector = ""

        df = pd.read_sql("""SELECT
                                {select_user}
                                strftime('%Y-%m-%d', time, 'unixepoch', 'localtime') AS day,
                                CAST(strftime('%H', time, 'unixepoch', 'localtime') AS INTEGER) AS hour,
                                count(*) as count {column_selector}
                            FROM (
                                SELECT * FROM "{table}" {order_by} {limit}
                                )
                            WHERE 1 {where_user} {where_daterange}
                            GROUP BY day, hour
                            {limit}
                         """.format(table=table,
                                    column_selector=column_selector,
                                    **self._sql(user=user,
                                                limit=limit,
                                                offset=offset,
                                                start=start,
                                                end=end)),
                         self.conn,
                         params={'user': user})
        util.df_normalize(df, old_tz=util.SYSTEM_TZ, tz=self._tz)
        return df
예제 #4
0
 def raw(self, table, user, limit=None, offset=None, start=None, end=None):
     df = pd.read_sql(
         """SELECT
                             *
                         FROM "{table}"
                         WHERE 1 {where_user} {where_daterange}
                         {order_by}
                         {limit}
                     """.format(
             table=table,
             **self._sql(user=user,
                         limit=limit,
                         offset=offset,
                         start=start,
                         end=end)
             #select_user=self._sql_select_user(user),  where_user=self._sql_where_user(user),
             #limit=self._sql_limit(limit)
         ),
         self.conn,
         params={'user': user})
     if 'time' in df:
         util.df_normalize(df, tz=self._tz)
     return df
예제 #5
0
 def occurrence(self,
                table,
                user,
                bin_width=720,
                limit=None,
                offset=None,
                start=None,
                end=None):
     n_intervals = 3600 / bin_width
     interval_width = 60 / n_intervals
     df = pd.read_sql("""SELECT {select_user} day, hour,
                             count(*) as occurrence, sum(bin_count) as count, group_concat(interval) AS withdata
                         FROM (
                             SELECT
                               strftime('%Y-%m-%d', time, 'unixepoch', 'localtime') AS day,
                               CAST(strftime('%H', time, 'unixepoch', 'localtime') AS INTEGER) AS hour,
                               CAST(strftime('%M', time, 'unixepoch', 'localtime')/:interval_width AS INTEGER) AS interval,
                               count(*) as bin_count
                              FROM "{table}"
                              WHERE 1 {where_user} {where_daterange}
                              GROUP BY day, hour, interval
                              {limit}
                             )
                         GROUP BY day, hour
                     """.format(table=table,
                                **self._sql(user=user,
                                            limit=limit,
                                            offset=offset,
                                            start=start,
                                            end=end)),
                      self.conn,
                      params={
                          'user': user,
                          'interval_width': interval_width
                      })
     util.df_normalize(df, old_tz=util.SYSTEM_TZ, tz=self._tz)
     return df