def generate_reference_and_validation_datasets(n=1000, validation_ratio=0.5): # Reference data train, questions, lectures, test = get_data(n) train_reference = merge_test(train, test) model = RiiidModel(questions, lectures, PARAMS) X_reference, *_ = model.fit_transform(train_reference) model.save(os.path.join(TEST_PATH, 'model_ref.zip')) # Compare data train, questions, lectures, test = get_data(n) train_compare, validation = merge_test(train, test, validation_ratio=validation_ratio) model = RiiidModel(questions, lectures, PARAMS) X_compare, y, train, valid = model.fit_transform(train_compare) model.fit_lgbm(X_compare[train], y[train], X_compare[valid], y[valid]) # Loading model model.save(os.path.join(TEST_PATH, 'model_test.zip')) model: RiiidModel = RiiidModel.load(os.path.join(TEST_PATH, 'model_test.zip')) X_validation = [] for test in validation: test = model.update(test) X, predictions = model.predict(test) if len(X) > 0: X_validation.append(X) validation = pd.concat(validation) X_validation = pd.concat(X_validation) data = (train_reference, X_reference, validation, X_validation) return data
from riiid.validation import merge_test from riiid.utils import configure_console_logging from riiid.config import INPUT_PATH, MODELS_PATH, PARAMS configure_console_logging() # Load and preprocess data loader = DataLoader(INPUT_PATH) train, questions, lectures = loader.load_first_users(30000) questions = preprocess_questions(questions) lectures = preprocess_lectures(lectures) # Load and merge validation set test = loader.load_tests('tests_0.pkl') train = merge_test(train, test) # Compute features model = RiiidModel(questions, lectures, params=PARAMS) X, y, train, valid = model.fit_transform(train) save_pkl((X, y, train, valid), path=os.path.join(MODELS_PATH, model.get_name('data.pkl'))) # Fit models model.fit_lgbm(X[train], y[train], X[valid], y[valid]) model.fit_catboost(X[train], y[train], X[valid], y[valid]) model.fit_neural(X[train], y[train], X[valid], y[valid]) model.fit_blender(X[valid], y[valid]) # Save model model.save(os.path.join(MODELS_PATH, model.get_name()))