def generate_reference_and_validation_datasets(n=1000, validation_ratio=0.5): # Reference data train, questions, lectures, test = get_data(n) train_reference = merge_test(train, test) model = RiiidModel(questions, lectures, PARAMS) X_reference, *_ = model.fit_transform(train_reference) model.save(os.path.join(TEST_PATH, 'model_ref.zip')) # Compare data train, questions, lectures, test = get_data(n) train_compare, validation = merge_test(train, test, validation_ratio=validation_ratio) model = RiiidModel(questions, lectures, PARAMS) X_compare, y, train, valid = model.fit_transform(train_compare) model.fit_lgbm(X_compare[train], y[train], X_compare[valid], y[valid]) # Loading model model.save(os.path.join(TEST_PATH, 'model_test.zip')) model: RiiidModel = RiiidModel.load(os.path.join(TEST_PATH, 'model_test.zip')) X_validation = [] for test in validation: test = model.update(test) X, predictions = model.predict(test) if len(X) > 0: X_validation.append(X) validation = pd.concat(validation) X_validation = pd.concat(X_validation) data = (train_reference, X_reference, validation, X_validation) return data
import riiideducation env = riiideducation.make_env() iter_test = env.iter_test() import os import sys import logging PATH = '/kaggle/input/riiid-submission' sys.path.append(PATH) from riiid.utils import configure_console_logging, check_versions from riiid.core.model import RiiidModel configure_console_logging() check_versions() logging.info('Load model') model = RiiidModel.load(os.path.join(PATH, 'model')) for test, _ in iter_test: test = model.update(test) _, predictions = model.predict(test) env.predict(predictions)
import os import time import logging from riiid.core.data import DataLoader from riiid.utils import configure_console_logging from riiid.config import MODELS_PATH, INPUT_PATH from riiid.core.model import RiiidModel configure_console_logging() logging.info('Loading model') MODEL_NAME = 'model_20210123_210542.zip' model: RiiidModel = RiiidModel.load(os.path.join(MODELS_PATH, MODEL_NAME)) tests = DataLoader(INPUT_PATH).load_tests_examples() for i, test in enumerate(tests): if model.test_batch == 1: start = time.perf_counter() test = model.update(test) _, predictions = model.predict(test) end = time.perf_counter() total = end - start logging.info('Time spent: {:.1f}s ({:.3f}s by batch)'.format(total, total / model.test_batch))