def test_dump_and_load(self): """ Test dumping and loading the SimpleRnn and make sure that all of its properties remain in shape. """ data, _, item_ids, _, _ = load_data(TESTDATA_FILENAME, 'assistments') num_questions = len(item_ids) nn_data = build_nn_data(data, num_questions) pivot = len(nn_data) // 2 train_data = nn_data[:pivot] max_compress_dim = 10 hidden_dim = 20 recurrent = False grad_norm_limit = 1.0 first_learning_rate = 20.0 decay_rate = 0.5 largest_grad = 4.0 batch_threshold = 0.8 opts = simple_rnn.RnnOpts(max_compress_dim=max_compress_dim, hidden_dim=hidden_dim, recurrent=recurrent, grad_norm_limit=grad_norm_limit, largest_grad=largest_grad, batch_threshold=batch_threshold, first_learning_rate=first_learning_rate, decay_rate=decay_rate) original = simple_rnn.SimpleRnn(train_data, opts) dumped = StringIO.StringIO() original.dump(dumped) dumped_str = dumped.getvalue() dumped_reader = StringIO.StringIO(dumped_str) recalled = simple_rnn.SimpleRnn.load(dumped_reader) for attr in ('max_compress_dim', 'recurrent', 'grad_norm_limit', 'first_learning_rate', 'decay_rate', 'largest_grad', 'batch_threshold'): self.assertEqual(getattr(original.opts, attr), getattr(recalled.opts, attr), "%s was changed" % attr)
def test_initialization(self): """ Just make sure initialize doesn't cause the interpreter to crash """ data, _, item_ids, _, _ = load_data(TESTDATA_FILENAME, 'assistments') num_questions = len(item_ids) nn_data = build_nn_data(data, num_questions) pivot = len(nn_data) // 2 train_data = nn_data[:pivot] test_data = nn_data[pivot:] opts = simple_rnn.RnnOpts(hidden_dim=20) simple_rnn.SimpleRnn(train_data, opts, test_data=test_data)
def test_proportion_students_retained(self): data_opts = undertest.DEFAULT_DATA_OPTS raw_output = assistments.load_data( TESTDATA_FILENAME, template_id_col=data_opts.template_id_col, concept_id_col=data_opts.concept_id_col, remove_nan_skill_ids=data_opts.remove_skill_nans, max_interactions_per_user=data_opts.max_interactions_per_user, min_interactions_per_user=data_opts.min_interactions_per_user, drop_duplicates=data_opts.drop_duplicates) output = undertest.load_data(TESTDATA_FILENAME, 'assistments', data_opts=data_opts) self.assertEqual(len(raw_output[0]), len(output[0])) test_proportion_students_retained = 2 / 3 data_opts = undertest.DataOpts( num_folds=2, item_id_col=None, template_id_col=None, concept_id_col=None, remove_skill_nans=False, seed=0, use_correct=True, use_hints=False, drop_duplicates=False, max_interactions_per_user=None, min_interactions_per_user=2, proportion_students_retained=test_proportion_students_retained) output = undertest.load_data(TESTDATA_FILENAME, 'assistments', data_opts=data_opts) total_users = raw_output[0][USER_IDX_KEY].nunique() retained_users = output[0][USER_IDX_KEY].nunique() self.assertAlmostEquals(retained_users / total_users, test_proportion_students_retained, 1e-5)
def test_run(self): """ Make sure RNN can run on assistments data and outputs results.""" for data_file, data_source in [(ASSISTMENTS_TESTDATA_FILENAME, ASSISTMENTS)]: data, _, item_ids, _, _ = load_data(data_file, data_source) data_folds = split_data(data, num_folds=TEST_NUM_FOLDS) undertest.run(data_folds, TEST_NUM_FOLDS, len(item_ids), TEST_NUM_ITERS, DEFAULT_DATA_OPTS, output=self.output_prefix) # Check that output was dumped for each fold for i in range(1, TEST_NUM_FOLDS + 1): with open(self.output_prefix + str(i), 'rb') as outfile: output = pickle.load(outfile) self.assertTrue(len(output))
def test_irt(self): """ Make sure IRT can run on Assistments data and outputs results.""" for data_file, data_source in [(ASSISTMENTS_TESTDATA_FILENAME, ASSISTMENTS)]: data, _, _, _, _ = load_data(data_file, data_source) for is_two_po in [True, False]: data_folds = split_data(data, num_folds=TEST_NUM_FOLDS) undertest.irt(data_folds, TEST_NUM_FOLDS, output=self.filename, is_two_po=is_two_po) with open(self.filename, 'rb') as output: output = pickle.load(output) for col in EXPECTED_COLS: self.assertTrue(col in output) self.assertTrue(np.all(output['is_two_po'].values == is_two_po))