Example #1
0
def gen_course_user_first_time():
    enr_df = utils.load_enroll()
    df = utils.load_log()
    min_date = utils.toordinal(df['time'].min())

    feat = []
    df = df.sort('time')
    for idx, row in df.groupby(['username', 'course_id']):
        times = sorted(row['time'].tolist())
        first_time = utils.toordinal(times[0])
        last_time = utils.toordinal(times[-1])
        feat.append({
            'username': idx[0],
            'course_id': idx[1],
            'first_time': first_time - min_date,
        })

    feat = pd.DataFrame(feat)
    featp = feat.pivot_table(values='first_time',
                             index='username',
                             columns='course_id').reset_index()
    featp.columns = ['username'] + list(range(39))

    enr_df = enr_df.merge(featp, how='left', on='username')
    enr_df.fillna(-1, inplace=True)

    return {'X': np.array(enr_df[list(range(39))])}
Example #2
0
def gen_time_by_username():
    # same as "time_feat.gen_time_by_username.npz" in initial_analysis
    enr_df = utils.load_enroll()
    df = utils.load_log()
    min_date = utils.toordinal(df['time'].min())

    feat = []
    df = df.sort('time')
    for idx, row in df.groupby('username'):
        times = sorted(row['time'].tolist())
        first_time = utils.toordinal(times[0])
        last_time = utils.toordinal(times[-1])
        feat.append({
            'username': idx,
            'first_time': first_time - min_date,
            'last_time': last_time - min_date,
        })

    feat = pd.DataFrame(feat)
    enr_df = enr_df.merge(feat, how='left', on='username')
    enr_df['first_time'] = enr_df['first_time'].fillna(-1)
    enr_df['last_time'] = enr_df['last_time'].fillna(-1)

    return {
        'first': utils.reshape(enr_df['first_time']),
        'last': utils.reshape(enr_df['last_time']),
    }
Example #3
0
def gen_event_last_time():
    enr_df = utils.load_enroll()
    df = utils.load_log_with_obj_attrib()
    min_date = utils.toordinal(df['time'].min())

    feat = []
    df = df.sort('time')
    for idx, row in df.groupby(['username', 'event']):
        times = sorted(row['time'].tolist())
        first_time = utils.toordinal(times[0])
        last_time = utils.toordinal(times[-1])
        feat.append({
            'username': idx[0],
            'event': idx[1],
            'last_time': last_time - min_date,
        })

    feat = pd.DataFrame(feat)
    featp = feat.pivot_table(values='last_time',
                             index='username',
                             columns='event').reset_index()
    col_sz = len(featp.columns) - 1
    featp.columns = ['username'] + list(range(col_sz))

    enr_df = enr_df.merge(featp, how='left', on='username')
    enr_df.fillna(-1, inplace=True)

    return {
        'X': np.array(enr_df[list(range(col_sz))]),
    }
Example #4
0
def gen_time_by_enrollment():
    # same as "time_feat.gen_first_time.npz" in initial_analysis
    enr_df = utils.load_enroll()
    df = utils.load_log()
    dx = df.groupby('course_id').agg({'time': 'min'}).reset_index()
    course_min_time = {}
    for idx, row in dx.iterrows():
        course_min_time[row['course_id']] = utils.toordinal(row['time'])

    feat = []
    df = df.sort('time')
    for idx, row in df.groupby('enrollment_id'):
        times = sorted(row['time'].tolist())
        course_id = row['course_id'].tolist()[0]
        first_time = utils.toordinal(times[0])
        last_time = utils.toordinal(times[-1])
        min_time = course_min_time[course_id]
        feat.append({
            'enrollment_id': idx,
            'first_time': first_time - min_time,
            'last_time': last_time - min_time,
        })

    feat = pd.DataFrame(feat)
    enr_df = enr_df.merge(feat, how='left', on='enrollment_id')
    enr_df['first_time'] = enr_df['first_time'].fillna(-1)
    enr_df['last_time'] = enr_df['last_time'].fillna(-1)

    return {
        'first': utils.reshape(enr_df['first_time']),
        'last': utils.reshape(enr_df['last_time']),
    }