def generate_reference_and_validation_datasets(n=1000, validation_ratio=0.5):
    # Reference data
    train, questions, lectures, test = get_data(n)
    train_reference = merge_test(train, test)
    model = RiiidModel(questions, lectures, PARAMS)
    X_reference, *_ = model.fit_transform(train_reference)
    model.save(os.path.join(TEST_PATH, 'model_ref.zip'))

    # Compare data
    train, questions, lectures, test = get_data(n)
    train_compare, validation = merge_test(train, test, validation_ratio=validation_ratio)
    model = RiiidModel(questions, lectures, PARAMS)
    X_compare, y, train, valid = model.fit_transform(train_compare)
    model.fit_lgbm(X_compare[train], y[train], X_compare[valid], y[valid])

    # Loading model
    model.save(os.path.join(TEST_PATH, 'model_test.zip'))
    model: RiiidModel = RiiidModel.load(os.path.join(TEST_PATH, 'model_test.zip'))

    X_validation = []
    for test in validation:
        test = model.update(test)
        X, predictions = model.predict(test)
        if len(X) > 0:
            X_validation.append(X)

    validation = pd.concat(validation)
    X_validation = pd.concat(X_validation)

    data = (train_reference, X_reference, validation, X_validation)
    return data
Exemple #2
0
from riiid.validation import merge_test
from riiid.utils import configure_console_logging
from riiid.config import INPUT_PATH, MODELS_PATH, PARAMS

configure_console_logging()

# Load and preprocess data
loader = DataLoader(INPUT_PATH)
train, questions, lectures = loader.load_first_users(30000)
questions = preprocess_questions(questions)
lectures = preprocess_lectures(lectures)

# Load and merge validation set
test = loader.load_tests('tests_0.pkl')
train = merge_test(train, test)

# Compute features
model = RiiidModel(questions, lectures, params=PARAMS)
X, y, train, valid = model.fit_transform(train)
save_pkl((X, y, train, valid),
         path=os.path.join(MODELS_PATH, model.get_name('data.pkl')))

# Fit models
model.fit_lgbm(X[train], y[train], X[valid], y[valid])
model.fit_catboost(X[train], y[train], X[valid], y[valid])
model.fit_neural(X[train], y[train], X[valid], y[valid])
model.fit_blender(X[valid], y[valid])

# Save model
model.save(os.path.join(MODELS_PATH, model.get_name()))