def test_trivial_crossvalidation(self): for i in range(1, 10): m_a = DenseMatrix(np.mat(np.random.random((i + 1,4)))) m_b = DenseMatrix(np.mat(np.random.random((i + 1,4)))) tmp_a = m_a.mat.copy() tmp_b = m_b.mat.copy() learner = RidgeRegressionLearner(param_range=[3], intercept=False) solution = learner.train(m_a, m_b) learner2 = RidgeRegressionLearner(param = 3, intercept=False) solution2 = learner2.train(m_a, m_b) np.testing.assert_array_equal(tmp_a, m_a.mat) np.testing.assert_array_equal(tmp_b, m_b.mat) np.testing.assert_array_equal(solution.mat, solution2.mat) learner = RidgeRegressionLearner(param_range=[3], intercept=False) solution = learner.train(m_a, m_b) np.testing.assert_array_equal(tmp_a, m_a.mat) np.testing.assert_array_equal(tmp_b, m_b.mat) np.testing.assert_array_equal(solution.mat, solution2.mat) learner = RidgeRegressionLearner(param_range=[0], intercept=False) solution = learner.train(m_a, m_b) learner2 = LstsqRegressionLearner(intercept=False) solution2 = learner2.train(m_a, m_b) np.testing.assert_array_almost_equal(solution.mat, solution2.mat, 3)
def test_crossvalidation(self): a = DenseMatrix(np.matrix([[1, 1],[2, 3],[4, 6]])) b = DenseMatrix(np.matrix([[12, 15, 18],[21, 27, 33],[35, 46, 57]])) res = DenseMatrix(np.matrix([[1, 2, 3],[4, 5, 6],[7, 8, 9]])) learner = RidgeRegressionLearner(intercept=True, param_range=[0]) learner2 = LstsqRegressionLearner(intercept=False) res1 = learner2.train(a, b) res2 = learner.train(a, b) np.testing.assert_array_almost_equal(res2.mat[:-1,:], res[0:2,:].mat, 6) np.testing.assert_array_almost_equal(res2.mat[-1,:], res[2:3,:].mat, 6) new_a = padd_matrix(a, 1) self.assertGreater(((a * res1) - b).norm(), ((new_a * res2) - b).norm())
def test_crossvalidation(self): a = DenseMatrix(np.matrix([[1, 1], [2, 3], [4, 6]])) b = DenseMatrix(np.matrix([[12, 15, 18], [21, 27, 33], [35, 46, 57]])) res = DenseMatrix(np.matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]])) learner = RidgeRegressionLearner(intercept=True, param_range=[0]) learner2 = LstsqRegressionLearner(intercept=False) res1 = learner2.train(a, b) res2 = learner.train(a, b) np.testing.assert_array_almost_equal(res2.mat[:-1, :], res[0:2, :].mat, 6) np.testing.assert_array_almost_equal(res2.mat[-1, :], res[2:3, :].mat, 6) new_a = padd_matrix(a, 1) self.assertGreater(((a * res1) - b).norm(), ((new_a * res2) - b).norm())
def test_space_train_dense(self): test_cases = [ ([("a", "b", "a_b")], self.space4, self.space5), ([("a", "b", "a_b")], self.space4, self.space6), ([("a", "b", "a_b"), ("a", "b", "a_a")], self.space4, self.space7), ] learners = [ RidgeRegressionLearner(intercept=False, crossvalidation=False, param=0), LstsqRegressionLearner(intercept=False), LstsqRegressionLearner(intercept=True) ] for in_data, arg_space, phrase_space in test_cases: for learner_ in learners: comp_model = FullAdditive(learner=learner_) comp_model.train(in_data, arg_space, phrase_space) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal( comp_space.cooccurrence_matrix.mat, phrase_space.cooccurrence_matrix.mat, 10) self.assertListEqual(comp_space.id2column, phrase_space.id2column) self.assertDictEqual(comp_space.column2id, phrase_space.column2id) self.assertListEqual(comp_space.id2row, phrase_space.id2row) self.assertDictEqual(comp_space.row2id, phrase_space.row2id) self.assertEqual(comp_model._has_intercept, learner_._intercept)
#load argument and phrase space arg_space = io_utils.load("./data/out/ex10.pkl") phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl") print("\nDefault regression:") my_comp = LexicalFunction() print(type(my_comp.regression_learner).__name__) my_comp.train(train_data, arg_space, phrase_space) #print its parameters print("Lexical function space:") print(my_comp.function_space.id2row) cooc_mat = my_comp.function_space.cooccurrence_matrix cooc_mat.reshape(my_comp.function_space.element_shape) print(cooc_mat) print("\nRidge Regression with lambda = 2") rr_learner = RidgeRegressionLearner(param=2, intercept=False, crossvalidation=False) my_comp = LexicalFunction(learner=rr_learner) my_comp.train(train_data, arg_space, phrase_space) #print its parameters print("Lexical function space:") print(my_comp.function_space.id2row) cooc_mat = my_comp.function_space.cooccurrence_matrix cooc_mat.reshape(my_comp.function_space.element_shape) print(cooc_mat)
print "Applying SVD..." space = space.apply(Svd(100)) print "Creating peripheral space.." per_space = PeripheralSpace.build(space, data=data_path + "per.raw.SV.sm", cols=data_path + "per.raw.SV.cols", format="sm") #reading in train data train_data_file = data_path + "ML08_SV_train.txt" train_data = io_utils.read_tuple_list(train_data_file, fields=[0, 1, 2]) print "Training Lexical Function composition model..." comp_model = LexicalFunction(learner=RidgeRegressionLearner(param=2)) comp_model.train(train_data, space, per_space) print "Composing phrases..." test_phrases_file = data_path + "ML08nvs_test.txt" test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0, 1, 2]) composed_space = comp_model.compose(test_phrases, space) print "Reading similarity test data..." test_similarity_file = data_path + "ML08data_new.txt" test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0, 1]) gold = io_utils.read_list(test_similarity_file, field=2) print "Computing similarity with lexical function..." pred = composed_space.get_sims(test_pairs, CosSimilarity())
def test_trivial_crossvalidation(self): for i in range(1, 10): m_a = DenseMatrix(np.mat(np.random.random((i + 1, 4)))) m_b = DenseMatrix(np.mat(np.random.random((i + 1, 4)))) tmp_a = m_a.mat.copy() tmp_b = m_b.mat.copy() learner = RidgeRegressionLearner(param_range=[3], intercept=False) solution = learner.train(m_a, m_b) learner2 = RidgeRegressionLearner(param=3, intercept=False) solution2 = learner2.train(m_a, m_b) np.testing.assert_array_equal(tmp_a, m_a.mat) np.testing.assert_array_equal(tmp_b, m_b.mat) np.testing.assert_array_equal(solution.mat, solution2.mat) learner = RidgeRegressionLearner(param_range=[3], intercept=False) solution = learner.train(m_a, m_b) np.testing.assert_array_equal(tmp_a, m_a.mat) np.testing.assert_array_equal(tmp_b, m_b.mat) np.testing.assert_array_equal(solution.mat, solution2.mat) learner = RidgeRegressionLearner(param_range=[0], intercept=False) solution = learner.train(m_a, m_b) learner2 = LstsqRegressionLearner(intercept=False) solution2 = learner2.train(m_a, m_b) np.testing.assert_array_almost_equal(solution.mat, solution2.mat, 3)