コード例 #1
0
        }
        if len(diff) > 0:
            diffs = [b - a for _, a, b in diff]
            analysis['min_difference'] = np.min(diffs)
            analysis['q01_difference'] = np.quantile(diffs, 0.01)
            analysis['q10_difference'] = np.quantile(diffs, 0.1)
            analysis['q20_difference'] = np.quantile(diffs, 0.2)
            analysis['mean_difference'] = np.mean(diffs)
            analysis['q80_difference'] = np.quantile(diffs, 0.8)
            analysis['q90_difference'] = np.quantile(diffs, 0.9)
            analysis['q99_difference'] = np.quantile(diffs, 0.99)
            analysis['max_difference'] = np.max(diffs)
            differences[column] = diff
        if len(nan_diff) > 0:
            nan_differences[column] = nan_diff
        results[column] = analysis

    results = pd.DataFrame(results).transpose().reset_index()
    return results, nan_differences, differences


configure_console_logging()

data = generate_reference_and_validation_datasets(n=10000, validation_ratio=0.1)
save_pkl(data, path=os.path.join(TEST_PATH, 'test_ref_val.pkl'))
data = load_pkl(os.path.join(TEST_PATH, 'test_ref_val.pkl'))
X_ref, X_val = build_ref_and_val_datasets(*data)
results, nan_differences, differences = compute_differences(X_ref, X_val)

save_pkl((X_ref, X_val, results, nan_differences, differences), path=os.path.join(TEST_PATH, 'test_results.pkl'))
コード例 #2
0
import os
from riiid.config import INPUT_PATH
from riiid.core.data import DataLoader, save_pkl, load_pkl
from riiid.validation import generate_test


loader = DataLoader(INPUT_PATH)
train, _, _ = loader.load()

test = generate_test(train, size=2_500_000, N=10_000, seed=0)
save_pkl(test, os.path.join(INPUT_PATH, 'tests_0.pkl'))

test = generate_test(train, size=5_000_000, N=20_000, seed=0)
save_pkl(test, os.path.join(INPUT_PATH, 'tests_1.pkl'))
コード例 #3
0
from riiid.core.data import DataLoader, save_pkl
from riiid.saint.model import SaintModel
from riiid.utils import configure_console_logging
from riiid.config import INPUT_PATH, MODELS_PATH


configure_console_logging()

# Load data
loader = DataLoader(INPUT_PATH)
train, questions, lectures = loader.load_first_users(30000)

# Compute features
model = SaintModel(questions, lectures)
train = model.fit_transform(train)

# Create train and validation datasets
train, test = model.split_train_test(train)
train = model.create_features(train)
test = model.create_features(test)
X_train, y_train = model.create_dataset(train)
X_test, y_test = model.create_dataset(test)
save_pkl((X_train, y_train, X_test, y_test), os.path.join(MODELS_PATH, model.get_name('data.pkl')))

# Fit model
model.fit(X_train, y_train, X_test, y_test)
model.score(X_test, y_test)

# Save model
model.save(MODELS_PATH)
コード例 #4
0
ファイル: train.py プロジェクト: fabien-vavrand/kaggle-riiid
from riiid.validation import merge_test
from riiid.utils import configure_console_logging
from riiid.config import INPUT_PATH, MODELS_PATH, PARAMS

configure_console_logging()

# Load and preprocess data
loader = DataLoader(INPUT_PATH)
train, questions, lectures = loader.load_first_users(30000)
questions = preprocess_questions(questions)
lectures = preprocess_lectures(lectures)

# Load and merge validation set
test = loader.load_tests('tests_0.pkl')
train = merge_test(train, test)

# Compute features
model = RiiidModel(questions, lectures, params=PARAMS)
X, y, train, valid = model.fit_transform(train)
save_pkl((X, y, train, valid),
         path=os.path.join(MODELS_PATH, model.get_name('data.pkl')))

# Fit models
model.fit_lgbm(X[train], y[train], X[valid], y[valid])
model.fit_catboost(X[train], y[train], X[valid], y[valid])
model.fit_neural(X[train], y[train], X[valid], y[valid])
model.fit_blender(X[valid], y[valid])

# Save model
model.save(os.path.join(MODELS_PATH, model.get_name()))
コード例 #5
0
ファイル: cache.py プロジェクト: fabien-vavrand/kaggle-riiid
 def save(self, data, cache_id):
     if self.activated:
         save_pkl(data, self._get_path(cache_id))
コード例 #6
0
 def save(self, path):
     if self.model is not None:
         self.model.save(os.path.join(path, self.get_name('model')))
         self.model = None
     save_pkl(self, os.path.join(path, self.get_name()))