コード例 #1
0
ファイル: keystroke_tools.py プロジェクト: wuami/PRISM
def get_mouse_features(userid, split_by, c):
    """ break up time series into chunks and get stats for each """
    if split_by not in ['user', 'insight', 'hour']:
        raise ValueError("could not get split_by %s" % split_by)
    if split_by == 'user' or split_by == 'insight':
        breaks = tt.get_time_breaks(userid, 'insight', c) 
    else:
        breaks = tt.get_time_breaks(userid, 'hour', c)
    time_series = get_mouse_series(userid, c)
    if split_by == 'user':
        means = pd.DataFrame([np.mean(time_series[x]) for x in time_series.columns]).T
    else:
        means = pd.concat([tt.break_and_get_mean(time_series[x], breaks) for x in time_series.columns], axis=1)
    means.columns = time_series.columns
    return means
コード例 #2
0
ファイル: watch_data_tools.py プロジェクト: wuami/PRISM
def get_all_features_for_id(userid, split_by, c):
    """ get all summary features for watch data for one user """
    features = ['timestamp', 'heart_rate', 'light', 'total_steps', 'walk_steps', 'run_steps', 'walk_freq', 'step_status', 'speed', 'distance', 'calories', 'accelx', 'accely', 'accelz', 'rotata', 'rotatb', 'rotatc']

    # get data from mysql
    breaks = tt.get_time_breaks(userid, split_by, c)
    data = pd.read_sql_query("SELECT %s FROM user_data WHERE userid = %s;" % (", ".join(features), userid), c)

    # split data and ensure correct format and data types
    time_series = pd.concat([split_blob_series(data.iloc[i,:]) for i in range(data.shape[0])])
    time_series.columns = features
    time_series = time_series.convert_objects(convert_dates=True, convert_numeric=True)
    time_series['timestamp'] = pd.to_datetime(time_series['timestamp'])
    time_series = time_series.set_index('timestamp')

    # convert cumulative data types
    cum_dtypes = ['total_steps', 'walk_steps', 'run_steps', 'distance', 'calories']
    #final_values = pd.concat([time_series[:b][cum_dtypes].iloc[-1] for b in breaks[1:]], axis=1).T
    final_values = pd.concat([tt.get_time_ceiling(b, time_series[cum_dtypes]) for b in breaks[1:]], axis=1).T
    time_series = time_series.join(time_series[cum_dtypes].diff(), lsuffix="", rsuffix="_diff")
    time_series = time_series.drop(cum_dtypes, 1)
    
    # convert acceleration and rotation coordinates to spherical
    time_series = time_series.join(cart2sph(time_series[['accelx', 'accely', 'accelz']]))
    time_series = time_series.drop(['accelx', 'accely', 'accelz'], 1)
    
    # return combined data over all time series
    data = pd.concat([tt.break_and_get_stats(time_series[x], breaks) for x in time_series.columns], axis=0).T
    featurelist = []
    for feat in time_series.columns:
        featurelist.extend(['mean_%s' % feat, 'std_%s' % feat, 'freq_%s' % feat])
    data.columns = featurelist
    final_values.index = data.index
    data = data.join(final_values)
    return data
コード例 #3
0
ファイル: train_model.py プロジェクト: wuami/PRISM
def get_var_by_user(var, user, split_by, c):
    if split_by == 'hour':
        df = pd.read_sql_query("SELECT time, %s FROM user_insights WHERE userid = %s" % (var, user), c)
        df = df.set_index('time')
        breaks = tt.get_time_breaks(user, split_by, c)
        return pd.concat([tt.get_time_ceiling(t, df) for t in breaks], axis=1).values[0,1:]
    df = pd.read_sql_query("SELECT %s FROM user_insights WHERE userid = %s" % (var, user), c)
    return df.values.T[0]
コード例 #4
0
def get_all_features_for_id(userid, split_by, c):
    """ get all summary features for watch data for one user """
    features = [
        'timestamp', 'heart_rate', 'light', 'total_steps', 'walk_steps',
        'run_steps', 'walk_freq', 'step_status', 'speed', 'distance',
        'calories', 'accelx', 'accely', 'accelz', 'rotata', 'rotatb', 'rotatc'
    ]

    # get data from mysql
    breaks = tt.get_time_breaks(userid, split_by, c)
    data = pd.read_sql_query(
        "SELECT %s FROM user_data WHERE userid = %s;" %
        (", ".join(features), userid), c)

    # split data and ensure correct format and data types
    time_series = pd.concat(
        [split_blob_series(data.iloc[i, :]) for i in range(data.shape[0])])
    time_series.columns = features
    time_series = time_series.convert_objects(convert_dates=True,
                                              convert_numeric=True)
    time_series['timestamp'] = pd.to_datetime(time_series['timestamp'])
    time_series = time_series.set_index('timestamp')

    # convert cumulative data types
    cum_dtypes = [
        'total_steps', 'walk_steps', 'run_steps', 'distance', 'calories'
    ]
    #final_values = pd.concat([time_series[:b][cum_dtypes].iloc[-1] for b in breaks[1:]], axis=1).T
    final_values = pd.concat(
        [tt.get_time_ceiling(b, time_series[cum_dtypes]) for b in breaks[1:]],
        axis=1).T
    time_series = time_series.join(time_series[cum_dtypes].diff(),
                                   lsuffix="",
                                   rsuffix="_diff")
    time_series = time_series.drop(cum_dtypes, 1)

    # convert acceleration and rotation coordinates to spherical
    time_series = time_series.join(
        cart2sph(time_series[['accelx', 'accely', 'accelz']]))
    time_series = time_series.drop(['accelx', 'accely', 'accelz'], 1)

    # return combined data over all time series
    data = pd.concat([
        tt.break_and_get_stats(time_series[x], breaks)
        for x in time_series.columns
    ],
                     axis=0).T
    featurelist = []
    for feat in time_series.columns:
        featurelist.extend(
            ['mean_%s' % feat,
             'std_%s' % feat,
             'freq_%s' % feat])
    data.columns = featurelist
    final_values.index = data.index
    data = data.join(final_values)
    return data
コード例 #5
0
ファイル: keystroke_tools.py プロジェクト: maulikkamdar/PRISM
def get_mouse_features(userid, split_by, c):
    """ break up time series into chunks and get stats for each """
    if split_by not in ['user', 'insight', 'hour']:
        raise ValueError("could not get split_by %s" % split_by)
    if split_by == 'user' or split_by == 'insight':
        breaks = tt.get_time_breaks(userid, 'insight', c)
    else:
        breaks = tt.get_time_breaks(userid, 'hour', c)
    time_series = get_mouse_series(userid, c)
    if split_by == 'user':
        means = pd.DataFrame(
            [np.mean(time_series[x]) for x in time_series.columns]).T
    else:
        means = pd.concat([
            tt.break_and_get_mean(time_series[x], breaks)
            for x in time_series.columns
        ],
                          axis=1)
    means.columns = time_series.columns
    return means
コード例 #6
0
ファイル: train_model.py プロジェクト: maulikkamdar/PRISM
def get_var_by_user(var, user, split_by, c):
    if split_by == 'hour':
        df = pd.read_sql_query(
            "SELECT time, %s FROM user_insights WHERE userid = %s" %
            (var, user), c)
        df = df.set_index('time')
        breaks = tt.get_time_breaks(user, split_by, c)
        return pd.concat([tt.get_time_ceiling(t, df) for t in breaks],
                         axis=1).values[0, 1:]
    df = pd.read_sql_query(
        "SELECT %s FROM user_insights WHERE userid = %s" % (var, user), c)
    return df.values.T[0]
コード例 #7
0
ファイル: keystroke_tools.py プロジェクト: wuami/PRISM
def get_keyboard_features(id, split_by, c):
    """ get all features - both latency and press time - for one user """
    if split_by not in ['user', 'insight', 'hour']:
        raise ValueError("could not get split_by %s" % split_by)
    if split_by == 'insight' or split_by == 'hour':
        insights = get_insights_by_user(id, c).T.values[0]
        keyboard = pd.concat([combine_keyboard_features(i,split_by,c) for i in insights])
        if split_by == 'insight':
            return keyboard
        else:
            breaks = tt.get_time_breaks(id, 'hour', c)[1:]
            times = pd.Series(get_insight_times_by_user(id, c).values[:,0])
            keyboard = keyboard.set_index(pd.to_datetime(times))
            keyboard = pd.concat([tt.get_time_ceiling(t, keyboard) for t in breaks], axis=1).T
            keyboard = keyboard.set_index(breaks)
            return keyboard
    else:
        return combine_keyboard_features(id, split_by, c)
コード例 #8
0
ファイル: keystroke_tools.py プロジェクト: maulikkamdar/PRISM
def get_keyboard_features(id, split_by, c):
    """ get all features - both latency and press time - for one user """
    if split_by not in ['user', 'insight', 'hour']:
        raise ValueError("could not get split_by %s" % split_by)
    if split_by == 'insight' or split_by == 'hour':
        insights = get_insights_by_user(id, c).T.values[0]
        keyboard = pd.concat(
            [combine_keyboard_features(i, split_by, c) for i in insights])
        if split_by == 'insight':
            return keyboard
        else:
            breaks = tt.get_time_breaks(id, 'hour', c)[1:]
            times = pd.Series(get_insight_times_by_user(id, c).values[:, 0])
            keyboard = keyboard.set_index(pd.to_datetime(times))
            keyboard = pd.concat(
                [tt.get_time_ceiling(t, keyboard) for t in breaks], axis=1).T
            keyboard = keyboard.set_index(breaks)
            return keyboard
    else:
        return combine_keyboard_features(id, split_by, c)