예제 #1
0
def get_data(n=1000):
    loader = DataLoader(INPUT_PATH)
    train, questions, lectures = loader.load_first_users(n)
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_1.pkl')
    return train, questions, lectures, test
예제 #2
0
def score_params(params, n_users=30000):
    loader = DataLoader(CONTEXT.data_path())
    train, questions, lectures = loader.load_first_users(n_users)
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_0.pkl')
    train = merge_test(train, test)
    del test

    model = RiiidModel(questions, lectures, params)
    X, y, train, valid = model.fit_transform(train)
    model.fit_lgbm(X[train], y[train], X[valid], y[valid])

    return model.best_score, model.best_iteration
예제 #3
0
import os
from riiid.core.data import DataLoader, save_pkl
from riiid.saint.model import SaintModel
from riiid.utils import configure_console_logging
from riiid.config import INPUT_PATH, MODELS_PATH


configure_console_logging()

# Load data
loader = DataLoader(INPUT_PATH)
train, questions, lectures = loader.load_first_users(30000)

# Compute features
model = SaintModel(questions, lectures)
train = model.fit_transform(train)

# Create train and validation datasets
train, test = model.split_train_test(train)
train = model.create_features(train)
test = model.create_features(test)
X_train, y_train = model.create_dataset(train)
X_test, y_test = model.create_dataset(test)
save_pkl((X_train, y_train, X_test, y_test), os.path.join(MODELS_PATH, model.get_name('data.pkl')))

# Fit model
model.fit(X_train, y_train, X_test, y_test)
model.score(X_test, y_test)

# Save model
model.save(MODELS_PATH)