def generate_reference_and_validation_datasets(n=1000, validation_ratio=0.5):
    # Reference data
    train, questions, lectures, test = get_data(n)
    train_reference = merge_test(train, test)
    model = RiiidModel(questions, lectures, PARAMS)
    X_reference, *_ = model.fit_transform(train_reference)
    model.save(os.path.join(TEST_PATH, 'model_ref.zip'))

    # Compare data
    train, questions, lectures, test = get_data(n)
    train_compare, validation = merge_test(train, test, validation_ratio=validation_ratio)
    model = RiiidModel(questions, lectures, PARAMS)
    X_compare, y, train, valid = model.fit_transform(train_compare)
    model.fit_lgbm(X_compare[train], y[train], X_compare[valid], y[valid])

    # Loading model
    model.save(os.path.join(TEST_PATH, 'model_test.zip'))
    model: RiiidModel = RiiidModel.load(os.path.join(TEST_PATH, 'model_test.zip'))

    X_validation = []
    for test in validation:
        test = model.update(test)
        X, predictions = model.predict(test)
        if len(X) > 0:
            X_validation.append(X)

    validation = pd.concat(validation)
    X_validation = pd.concat(X_validation)

    data = (train_reference, X_reference, validation, X_validation)
    return data
Exemple #2
0
import riiideducation
env = riiideducation.make_env()
iter_test = env.iter_test()

import os
import sys
import logging
PATH = '/kaggle/input/riiid-submission'
sys.path.append(PATH)

from riiid.utils import configure_console_logging, check_versions
from riiid.core.model import RiiidModel

configure_console_logging()
check_versions()

logging.info('Load model')
model = RiiidModel.load(os.path.join(PATH, 'model'))

for test, _ in iter_test:
    test = model.update(test)
    _, predictions = model.predict(test)
    env.predict(predictions)
Exemple #3
0
import os
import time
import logging

from riiid.core.data import DataLoader
from riiid.utils import configure_console_logging
from riiid.config import MODELS_PATH, INPUT_PATH
from riiid.core.model import RiiidModel


configure_console_logging()

logging.info('Loading model')
MODEL_NAME = 'model_20210123_210542.zip'
model: RiiidModel = RiiidModel.load(os.path.join(MODELS_PATH, MODEL_NAME))

tests = DataLoader(INPUT_PATH).load_tests_examples()

for i, test in enumerate(tests):
    if model.test_batch == 1:
        start = time.perf_counter()

    test = model.update(test)
    _, predictions = model.predict(test)

end = time.perf_counter()
total = end - start
logging.info('Time spent: {:.1f}s ({:.3f}s by batch)'.format(total, total / model.test_batch))