nb_hidden = round(HIDDEN_RATE * nb_questions) u0 = 0 a = 4685763 b = 47831 n = 47564875 u = [u0] for i in range(nb_hidden * nb_students): u.append(((a * u[-1] + b) % n) % nb_questions) return u chrono = Chrono() files = IO() for dataset_name in ['fraction']: dataset = Dataset(dataset_name, files) q = QMatrix() q.load('qmatrix-%s' % dataset_name) models = [Zero(), IRT(), MIRT(dim=2), MIRT(dim=3), MIRT(q=q)] # [IRT()]#, MIRT(dim=2), MIRT(dim=3), MIRT(q=q)] # , MHRM(dim=2), , MHRM(dim=2) dataset.load_subset() print(dataset) for i_exp in range(STUDENT_FOLD): # train_subset = dataset.train_subsets[i_exp] # test_subset = dataset.test_subsets[i_exp] for j_exp in range(QUESTION_FOLD): # validation_index = set(dataset.validation_question_sets[j_exp]) files.update(i_exp, j_exp) for model in models: print(model.name) begin = datetime.now() print(begin) data = np.array(dataset.data)
dataset = Dataset(DATASET_NAME) nb_train = 0 with open(CSV_TRAIN, 'w') as train: with open(CSV_TEST, 'w') as test: with open(CSV_VAL, 'w') as val: for i in range(dataset.nb_students): for j in range(dataset.nb_questions): line = ','.join([ str(i), str(j), '1' if dataset.data[i][j] else '0', '0', '0' ]) + '\n' if random.random() < 0.8: nb_train += 1 train.write(line) else: test.write(line) val.write(line) qm = QMatrix() qm.load('qmatrix-%s' % DATASET_NAME) save_npz(Q_NPZ, csr_matrix(qm.Q)) with open(CONFIG, 'w') as f: config = { 'USER_NUM': dataset.nb_students, 'ITEM_NUM': dataset.nb_questions, 'NB_CLASSES': 2, 'BATCH_SIZE': nb_train } f.write(yaml.dump(config, default_flow_style=False))
def simulate(train_data, test_data): q = QMatrix() q.load('qmatrix-%s' % dataset_name) model = MIRT(q=q) # model = MIRT(dim=2) model.training_step(train_data) nb_students = len(test_data) nb_questions = len(test_data[0]) report = { strategy: { 'delta': [], 'mean_error': [], 'nb_mistakes': [], 'model_name': strategy, 'dim': model.get_dim() } for strategy in strategies + ['cat'] } for student_id in range(nb_students): for strategy in strategies + ['cat']: for key in ['delta', 'mean_error', 'nb_mistakes']: report[strategy][key].append([]) # Data for new student truth = np.array(test_data[student_id], dtype=np.float64) if student_id % 20 == 0: print(student_id) say('Étudiant', student_id, test_data[student_id]) # True theta model.init_test() model.bootstrap(range(nb_questions), truth) # Ask all questions get all answers true_theta = model.theta # Record maximum likelihood estimate for nb_questions_asked in range(1, nb_questions + 1): for strategy in strategies: model.init_test() if strategy == 'random': # Random chosen = random.sample(range(nb_questions), nb_questions_asked) elif strategy == 'dpp': # DPP chosen = model.select_batch(nb_questions_asked) elif strategy == 'uncertainty': _, chosen = zip(*sorted( zip(model.predict_performance(), range(nb_questions)), key=lambda x: abs(x[0] - 0.5))[:nb_questions_asked]) chosen = list(chosen) answers = truth[chosen] model.bootstrap(chosen, answers) performance = model.predict_performance() report[strategy]['mean_error'][student_id].append( full_logloss(performance, truth)) report[strategy]['nb_mistakes'][student_id].append( nb_mistakes(performance, truth)) report[strategy]['delta'][student_id].append( get_delta(model.theta, true_theta)) say('mean_error', full_logloss(performance, truth)) say(nb_mistakes(performance, truth), 'correct out of', len(chosen)) # report[strategy]['mean_error'].reverse() # report[strategy]['delta'].reverse() # CAT model.init_test() replied_so_far = [] results_so_far = [] for t in range(1, nb_questions + 1): question_id = model.next_item(replied_so_far, results_so_far) say('\nRound', t, '-> We ask question', question_id + 1, 'to the examinee.') say('Correct!' if test_data[student_id][question_id] else 'Incorrect.') #, "I expected: %f." % round( replied_so_far.append(question_id) results_so_far.append(test_data[student_id][question_id]) model.estimate_parameters(replied_so_far, results_so_far) performance = model.predict_performance() # say(' '.join(map(lambda x: str(int(10 * round(x, 1))), performance))) # say('Estimate:', ''.join(map(lambda x: '%d' % int(round(x)), performance))) # say(' Truth:', ''.join(map(lambda x: '%d' % int(x), test_data[student_id]))) # say(full_logloss(performance, truth)) report['cat']['mean_error'][student_id].append( full_logloss(performance, truth)) report['cat']['nb_mistakes'][student_id].append( nb_mistakes(performance, truth)) report['cat']['delta'][student_id].append( get_delta(model.theta, true_theta)) say('mean_error', full_logloss(performance, truth)) say(nb_mistakes(performance, truth), 'correct out of', nb_questions) break return report
files.backup( 'log-%s-%s-%s' % (dataset_name, filename, datetime.now().strftime('%d%m%Y%H%M%S')), report) print(datetime.now()) if __name__ == '__main__': if sys.argv[1] == 'baseline': from baseline import Baseline models = [Baseline()] elif sys.argv[1] == 'qm': from qmatrix import QMatrix models = [] for nb_competences in nb_competences_values: models.append(QMatrix(nb_competences=nb_competences)) elif sys.argv[1] == 'dina': from qmatrix import QMatrix q = QMatrix() q.load('qmatrix-%s' % sys.argv[2]) # print('test', q.model_error()) models = [q] elif sys.argv[1] == 'qmspe': from qmatrix import QMatrix q = QMatrix() print('Toujours', q.prior) q.load('qmatrix-%s' % dataset_name) print('Toujours2', q.prior) models = [q] elif sys.argv[1] == 'irt': from irt import IRT
# coding=utf8 import rpy2.robjects as robjects from rpy2.robjects.packages import importr from calc import logloss, compute_mean_entropy import random r = robjects.r cdm = importr('CDM') r('Q <- as.matrix(fraction.subtraction.qmatrix)') # print(r("Q")) r('entries <- c()') r('for(i in 1:20) { for(j in 1:8) { entries <- c(entries, Q[i, j]) } }') print(r("entries")) from qmatrix import QMatrix q = QMatrix(nb_competences=8) q.load('qmatrix-cdm') entries = [] for line in q.Q: entries.extend(map(int, line)) print('entries', entries) robjects.globalenv['entries'] = robjects.IntVector(entries) print(r("entries")) r("Q <- matrix(c(entries), ncol=8, dimnames=list(NULL, c('F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8')), byrow=TRUE)") print(r("Q"))
from qmatrix import QMatrix Q = [ [1, 0], [0, 1], [1, 1] ] model = QMatrix(nb_competences=2, Q=Q, slip=[0] * 3, guess=[0] * 3) model.generate_student_data(100, [0.6, 0.8])
from qmatrix import QMatrix q = QMatrix() q.load('qmatrix-fake') q.init_test(set()) print(q.p_test) q.next_item([], []) q.estimate_parameters([0], [0]) print(q.next_item([0], [])) q.estimate_parameters([0, 3], [0, 1]) print(q.next_item([0], [])) q.estimate_parameters([0, 3, 1], [0, 1, 1]) print(q.next_item([0], [])) print(q.p_test) q.generate_student_data(20, [0.7] * 3)