} if len(diff) > 0: diffs = [b - a for _, a, b in diff] analysis['min_difference'] = np.min(diffs) analysis['q01_difference'] = np.quantile(diffs, 0.01) analysis['q10_difference'] = np.quantile(diffs, 0.1) analysis['q20_difference'] = np.quantile(diffs, 0.2) analysis['mean_difference'] = np.mean(diffs) analysis['q80_difference'] = np.quantile(diffs, 0.8) analysis['q90_difference'] = np.quantile(diffs, 0.9) analysis['q99_difference'] = np.quantile(diffs, 0.99) analysis['max_difference'] = np.max(diffs) differences[column] = diff if len(nan_diff) > 0: nan_differences[column] = nan_diff results[column] = analysis results = pd.DataFrame(results).transpose().reset_index() return results, nan_differences, differences configure_console_logging() data = generate_reference_and_validation_datasets(n=10000, validation_ratio=0.1) save_pkl(data, path=os.path.join(TEST_PATH, 'test_ref_val.pkl')) data = load_pkl(os.path.join(TEST_PATH, 'test_ref_val.pkl')) X_ref, X_val = build_ref_and_val_datasets(*data) results, nan_differences, differences = compute_differences(X_ref, X_val) save_pkl((X_ref, X_val, results, nan_differences, differences), path=os.path.join(TEST_PATH, 'test_results.pkl'))
import os from riiid.config import INPUT_PATH from riiid.core.data import DataLoader, save_pkl, load_pkl from riiid.validation import generate_test loader = DataLoader(INPUT_PATH) train, _, _ = loader.load() test = generate_test(train, size=2_500_000, N=10_000, seed=0) save_pkl(test, os.path.join(INPUT_PATH, 'tests_0.pkl')) test = generate_test(train, size=5_000_000, N=20_000, seed=0) save_pkl(test, os.path.join(INPUT_PATH, 'tests_1.pkl'))
from riiid.core.data import DataLoader, save_pkl from riiid.saint.model import SaintModel from riiid.utils import configure_console_logging from riiid.config import INPUT_PATH, MODELS_PATH configure_console_logging() # Load data loader = DataLoader(INPUT_PATH) train, questions, lectures = loader.load_first_users(30000) # Compute features model = SaintModel(questions, lectures) train = model.fit_transform(train) # Create train and validation datasets train, test = model.split_train_test(train) train = model.create_features(train) test = model.create_features(test) X_train, y_train = model.create_dataset(train) X_test, y_test = model.create_dataset(test) save_pkl((X_train, y_train, X_test, y_test), os.path.join(MODELS_PATH, model.get_name('data.pkl'))) # Fit model model.fit(X_train, y_train, X_test, y_test) model.score(X_test, y_test) # Save model model.save(MODELS_PATH)
from riiid.validation import merge_test from riiid.utils import configure_console_logging from riiid.config import INPUT_PATH, MODELS_PATH, PARAMS configure_console_logging() # Load and preprocess data loader = DataLoader(INPUT_PATH) train, questions, lectures = loader.load_first_users(30000) questions = preprocess_questions(questions) lectures = preprocess_lectures(lectures) # Load and merge validation set test = loader.load_tests('tests_0.pkl') train = merge_test(train, test) # Compute features model = RiiidModel(questions, lectures, params=PARAMS) X, y, train, valid = model.fit_transform(train) save_pkl((X, y, train, valid), path=os.path.join(MODELS_PATH, model.get_name('data.pkl'))) # Fit models model.fit_lgbm(X[train], y[train], X[valid], y[valid]) model.fit_catboost(X[train], y[train], X[valid], y[valid]) model.fit_neural(X[train], y[train], X[valid], y[valid]) model.fit_blender(X[valid], y[valid]) # Save model model.save(os.path.join(MODELS_PATH, model.get_name()))
def save(self, data, cache_id): if self.activated: save_pkl(data, self._get_path(cache_id))
def save(self, path): if self.model is not None: self.model.save(os.path.join(path, self.get_name('model'))) self.model = None save_pkl(self, os.path.join(path, self.get_name()))