Esempio n. 1
0
def main():
    train_df = pd.read_csv('../data/processed/train_dropped.csv',
                           dtype=const.DTYPE)
    question_df = pd.read_csv(const.INPUT_DATA_DIR / 'questions.csv')

    train_features_df = get_features(train_df, question_df)
    save_features(train_features_df, data_type='train')
def main():
    train_df = pd.read_feather('../data/input/train_data.feather')
    test_df = pd.read_feather('../data/input/test_data.feather')

    train_features_df, test_features_df = get_features(train_df, test_df)

    save_features(train_features_df, data_type='train')
    save_features(test_features_df, data_type='test')
Esempio n. 3
0
def main():
    train_df = pd.read_csv('../data/input/train_concated.csv')
    test_df = pd.read_csv('../data/input/test.csv')

    train_features_df, test_features_df = get_features(train_df, test_df)

    save_features(train_features_df, data_type='train')
    save_features(test_features_df, data_type='test')
Esempio n. 4
0
def main():
    train_df = pd.read_csv(const.INPUT_DATA_DIR / 'train.csv',
                           dtype=const.DTYPE)
    questions_df = pd.read_csv('../data/input/questions.csv')
    q2p = dict(questions_df[['question_id', 'part']].values)
    train_df['part'] = train_df['content_id'].map(q2p)

    train_features_df = get_features(train_df)
    save_features(train_features_df, data_type='train')
Esempio n. 5
0
def main():
    train_df = pd.read_csv('../data/processed/train_dropped.csv',
                           dtype=const.DTYPE)
    question_df = pd.read_csv(const.INPUT_DATA_DIR / 'questions.csv',
                              dtype=const.DTYPE)
    question_df.rename(columns={'question_id': 'content_id'}, inplace=True)

    train_df = pd.merge(train_df, question_df, on='content_id', how='left')

    train_features_df = get_features(train_df)
    save_features(train_features_df, data_type='train')
Esempio n. 6
0
def main():
    train_df = pd.read_csv(const.INPUT_DATA_DIR / 'train.csv',
                           dtype=const.DTYPE)
    question_df = pd.read_csv(const.INPUT_DATA_DIR / 'questions.csv')
    question_df.rename(columns={'question_id': 'content_id'}, inplace=True)

    train_df = pd.merge(train_df, question_df, on='content_id', how='left')

    usecols = ['user_id', 'content_id', 'part', 'prior_question_elapsed_time']
    train_features_df = get_features(train_df[usecols])
    save_features(train_features_df, data_type='train')
Esempio n. 7
0
def main():
    train_df = dh.load('../data/input/train_concated.csv')
    test_df = dh.load('../data/input/test.csv')

    whole_df = pd.concat([train_df, test_df], axis=0, sort=False, ignore_index=True)

    whole_features_df = get_features(whole_df)

    train_features_df = whole_features_df.iloc[:len(train_df)]
    test_features_df = whole_features_df.iloc[len(train_df):]

    save_features(train_features_df, data_type='train')
    save_features(test_features_df, data_type='test')
def main():
    train_df = pd.read_csv('../data/processed/train_dropped.csv',
                           dtype=const.DTYPE)
    lectures_df = pd.read_csv(const.INPUT_DATA_DIR / 'lectures.csv')
    lectures_df.rename(columns={'lecture_id': 'content_id'}, inplace=True)

    attempt_c = pd.read_feather('../features/dropped___attempt_c_train.feather'
                                )['dropped___attempt_c'].values
    train_df['dropped___attempt_c'] = np.where(attempt_c <= 3, attempt_c, 4)

    train_df = pd.merge(train_df, lectures_df, on='content_id', how='left')

    train_features_df = get_features(train_df)
    save_features(train_features_df, data_type='train')
Esempio n. 9
0
def main():
    train_df = dh.load('../data/input/train_concated.csv')
    test_df = dh.load('../data/input/test.csv')

    train2020_size_df = pd.read_csv('../data/input/train_image_size.csv')
    train2019_size_df = pd.read_csv('../data/input/train_2019.csv', usecols=['image_name', 'height', 'width'])

    train_size_df = pd.concat([
        train2020_size_df,
        train2019_size_df
    ], axis=0, sort=False, ignore_index=True)

    test_size_df = pd.read_csv('../data/input/test_image_size.csv')

    train_df = train_df.merge(train_size_df, on='image_name', how='left')
    test_df = test_df.merge(test_size_df, on='image_name', how='left')

    train_features_df, test_features_df = get_features(train_df, test_df)

    save_features(train_features_df, data_type='train')
    save_features(test_features_df, data_type='test')
def main():
    train_df = pd.read_csv(const.INPUT_DATA_DIR / 'train.csv',
                           dtype=const.DTYPE)

    train_features_df = get_features(train_df)
    save_features(train_features_df, data_type='train')
Esempio n. 11
0
def main():
    train_df = pd.read_csv('../data/processed/train_dropped.csv',
                           dtype=const.DTYPE)

    train_features_df = get_features(train_df)
    save_features(train_features_df, data_type='train')