Exemple #1
0
def score_params(params, n_users=30000):
    loader = DataLoader(CONTEXT.data_path())
    train, questions, lectures = loader.load_first_users(n_users)
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_0.pkl')
    train = merge_test(train, test)
    del test

    model = RiiidModel(questions, lectures, params)
    X, y, train, valid = model.fit_transform(train)
    model.fit_lgbm(X[train], y[train], X[valid], y[valid])

    return model.best_score, model.best_iteration
Exemple #2
0
    def predict(self, X):
        if self.test_batch % 1 == 0:
            logging.info('Running test batch {}'.format(self.test_batch))

        X = self.lectures_pipeline.transform(X)
        X = RiiidModel.remove_lectures(X)
        if len(X) > 0:
            predictions = X[['row_id']].copy()
            X = self.pipeline.transform(X)
            self._update_context_with_priors(X)
            inputs = self._create_prediction_data(X)
            self._roll_context_on_priors(X)
            self._update_context(X, self.independent_features)
            predictions['answered_correctly'] = self.model.predict(inputs)[:, -1, -1]
        else:
            predictions = pd.DataFrame(columns=['row_id', 'answered_correctly'])
        self.test_batch += 1
        return X, predictions
Exemple #3
0
    def fit_transform(self, X):
        logging.info('- Fit')
        self._init_fit(X)

        self.lectures_pipeline = make_pipeline(
            LecturesTransformer(self.lectures)
        )
        X = self.lectures_pipeline.fit_transform(X)
        X = RiiidModel.remove_lectures(X)

        cv = self._build_cv(X)
        self.pipeline = make_pipeline(
            ScoreEncoder('content_id', cv=cv, smoothing_min=5, smoothing_value=1, noise=0.005),
            QuestionsTransformer(self.questions, time_bins=self.time_bins, lag_bins=self.lag_bins)
        )
        X = self.pipeline.fit_transform(X)

        self._create_context(X)
        return X
Exemple #4
0
    def update(self, test):
        prior_user_answer = eval(test['prior_group_responses'].values[0])
        prior_answered_correctly = eval(test['prior_group_answers_correct'].values[0])
        test = test.drop(columns=['prior_group_answers_correct', 'prior_group_responses'])

        if self.previous_test is not None:
            self.previous_test['user_answer'] = prior_user_answer
            self.previous_test['answered_correctly'] = prior_answered_correctly

            X = self.previous_test
            # X = update_pipeline(self.lectures_pipeline, X)  # Not required
            X = RiiidModel.remove_lectures(X)
            if len(X) > 0:
                y = X['answered_correctly']
                X = update_pipeline(self.pipeline, X, y)
                self._update_context(X, self.dependent_features)

        self.previous_test = test.copy()
        return test
def generate_reference_and_validation_datasets(n=1000, validation_ratio=0.5):
    # Reference data
    train, questions, lectures, test = get_data(n)
    train_reference = merge_test(train, test)
    model = RiiidModel(questions, lectures, PARAMS)
    X_reference, *_ = model.fit_transform(train_reference)
    model.save(os.path.join(TEST_PATH, 'model_ref.zip'))

    # Compare data
    train, questions, lectures, test = get_data(n)
    train_compare, validation = merge_test(train, test, validation_ratio=validation_ratio)
    model = RiiidModel(questions, lectures, PARAMS)
    X_compare, y, train, valid = model.fit_transform(train_compare)
    model.fit_lgbm(X_compare[train], y[train], X_compare[valid], y[valid])

    # Loading model
    model.save(os.path.join(TEST_PATH, 'model_test.zip'))
    model: RiiidModel = RiiidModel.load(os.path.join(TEST_PATH, 'model_test.zip'))

    X_validation = []
    for test in validation:
        test = model.update(test)
        X, predictions = model.predict(test)
        if len(X) > 0:
            X_validation.append(X)

    validation = pd.concat(validation)
    X_validation = pd.concat(X_validation)

    data = (train_reference, X_reference, validation, X_validation)
    return data
Exemple #6
0
import riiideducation
env = riiideducation.make_env()
iter_test = env.iter_test()

import os
import sys
import logging
PATH = '/kaggle/input/riiid-submission'
sys.path.append(PATH)

from riiid.utils import configure_console_logging, check_versions
from riiid.core.model import RiiidModel

configure_console_logging()
check_versions()

logging.info('Load model')
model = RiiidModel.load(os.path.join(PATH, 'model'))

for test, _ in iter_test:
    test = model.update(test)
    _, predictions = model.predict(test)
    env.predict(predictions)
Exemple #7
0
from riiid.validation import merge_test
from riiid.utils import configure_console_logging
from riiid.config import INPUT_PATH, MODELS_PATH, PARAMS

configure_console_logging()

# Load and preprocess data
loader = DataLoader(INPUT_PATH)
train, questions, lectures = loader.load_first_users(30000)
questions = preprocess_questions(questions)
lectures = preprocess_lectures(lectures)

# Load and merge validation set
test = loader.load_tests('tests_0.pkl')
train = merge_test(train, test)

# Compute features
model = RiiidModel(questions, lectures, params=PARAMS)
X, y, train, valid = model.fit_transform(train)
save_pkl((X, y, train, valid),
         path=os.path.join(MODELS_PATH, model.get_name('data.pkl')))

# Fit models
model.fit_lgbm(X[train], y[train], X[valid], y[valid])
model.fit_catboost(X[train], y[train], X[valid], y[valid])
model.fit_neural(X[train], y[train], X[valid], y[valid])
model.fit_blender(X[valid], y[valid])

# Save model
model.save(os.path.join(MODELS_PATH, model.get_name()))
Exemple #8
0
try:
    cache.CACHE_MANAGER = S3CacheManager('kaggle-riiid-cache')

    loader = DataLoader(CONTEXT.data_path())
    train, questions, lectures = loader.load()
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_0.pkl')
    train = merge_test(train, test)
    del test

    PARAMS['question_embedding']['workers'] = 32
    PARAMS['answers_embedding']['workers'] = 32
    model = RiiidModel(questions, lectures, params=PARAMS)
    X, y, train, valid = model.fit_transform(train)

    bucket = S3Bucket(model.get_normalized_name())

    logging.info('Saving data')
    for data, name in [(X, 'X'), (y, 'y'), (train, 'train'), (valid, 'valid')]:
        bucket.save_pickle_multiparts(data, name + '.pkl')

    model.fit_lgbm(X[train], y[train], X[valid], y[valid])
    model.fit_catboost(X[train], y[train], X[valid], y[valid])

    logging.info('Saving model')
    bucket.save_multiparts(model.save_with_source(), model.get_name())

except Exception as e:
Exemple #9
0
import os
import time
import logging

from riiid.core.data import DataLoader
from riiid.utils import configure_console_logging
from riiid.config import MODELS_PATH, INPUT_PATH
from riiid.core.model import RiiidModel


configure_console_logging()

logging.info('Loading model')
MODEL_NAME = 'model_20210123_210542.zip'
model: RiiidModel = RiiidModel.load(os.path.join(MODELS_PATH, MODEL_NAME))

tests = DataLoader(INPUT_PATH).load_tests_examples()

for i, test in enumerate(tests):
    if model.test_batch == 1:
        start = time.perf_counter()

    test = model.update(test)
    _, predictions = model.predict(test)

end = time.perf_counter()
total = end - start
logging.info('Time spent: {:.1f}s ({:.3f}s by batch)'.format(total, total / model.test_batch))