Exemple #1
0
import os
from riiid.config import INPUT_PATH
from riiid.core.data import DataLoader, save_pkl, load_pkl
from riiid.validation import generate_test


loader = DataLoader(INPUT_PATH)
train, _, _ = loader.load()

test = generate_test(train, size=2_500_000, N=10_000, seed=0)
save_pkl(test, os.path.join(INPUT_PATH, 'tests_0.pkl'))

test = generate_test(train, size=5_000_000, N=20_000, seed=0)
save_pkl(test, os.path.join(INPUT_PATH, 'tests_1.pkl'))
Exemple #2
0
from riiid.core.data import DataLoader, preprocess_questions, preprocess_lectures
from riiid.core.model import RiiidModel
from riiid.validation import merge_test
from riiid.config import PARAMS
from riiid import cache
from riiid.aws.cache import S3CacheManager
from riiid.aws.config import CONTEXT

CONTEXT.get_logger()

try:
    cache.CACHE_MANAGER = S3CacheManager('kaggle-riiid-cache')

    loader = DataLoader(CONTEXT.data_path())
    train, questions, lectures = loader.load()
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_0.pkl')
    train = merge_test(train, test)
    del test

    PARAMS['question_embedding']['workers'] = 32
    PARAMS['answers_embedding']['workers'] = 32
    model = RiiidModel(questions, lectures, params=PARAMS)
    X, y, train, valid = model.fit_transform(train)

    bucket = S3Bucket(model.get_normalized_name())

    logging.info('Saving data')