def test_trivial_crossvalidation(self):

        for i in range(1, 10):
            m_a = DenseMatrix(np.mat(np.random.random((i + 1,4))))
            m_b = DenseMatrix(np.mat(np.random.random((i + 1,4))))
            tmp_a = m_a.mat.copy()
            tmp_b = m_b.mat.copy()

            learner = RidgeRegressionLearner(param_range=[3], intercept=False)
            solution = learner.train(m_a, m_b)

            learner2 = RidgeRegressionLearner(param = 3, intercept=False)
            solution2 = learner2.train(m_a, m_b)

            np.testing.assert_array_equal(tmp_a, m_a.mat)
            np.testing.assert_array_equal(tmp_b, m_b.mat)
            np.testing.assert_array_equal(solution.mat, solution2.mat)

            learner = RidgeRegressionLearner(param_range=[3], intercept=False)
            solution = learner.train(m_a, m_b)

            np.testing.assert_array_equal(tmp_a, m_a.mat)
            np.testing.assert_array_equal(tmp_b, m_b.mat)
            np.testing.assert_array_equal(solution.mat, solution2.mat)

            learner = RidgeRegressionLearner(param_range=[0], intercept=False)
            solution = learner.train(m_a, m_b)

            learner2 = LstsqRegressionLearner(intercept=False)
            solution2 = learner2.train(m_a, m_b)

            np.testing.assert_array_almost_equal(solution.mat, solution2.mat, 3)
    def test_crossvalidation(self):

        a = DenseMatrix(np.matrix([[1, 1],[2, 3],[4, 6]]))
        b = DenseMatrix(np.matrix([[12, 15, 18],[21, 27, 33],[35, 46, 57]]))
        res = DenseMatrix(np.matrix([[1, 2, 3],[4, 5, 6],[7, 8, 9]]))

        learner = RidgeRegressionLearner(intercept=True, param_range=[0])
        learner2 = LstsqRegressionLearner(intercept=False)

        res1 = learner2.train(a, b)
        res2 = learner.train(a, b)

        np.testing.assert_array_almost_equal(res2.mat[:-1,:], res[0:2,:].mat, 6)
        np.testing.assert_array_almost_equal(res2.mat[-1,:], res[2:3,:].mat, 6)

        new_a = padd_matrix(a, 1)
        self.assertGreater(((a * res1) - b).norm(), ((new_a * res2) - b).norm())
예제 #3
0
    def test_crossvalidation(self):

        a = DenseMatrix(np.matrix([[1, 1], [2, 3], [4, 6]]))
        b = DenseMatrix(np.matrix([[12, 15, 18], [21, 27, 33], [35, 46, 57]]))
        res = DenseMatrix(np.matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]]))

        learner = RidgeRegressionLearner(intercept=True, param_range=[0])
        learner2 = LstsqRegressionLearner(intercept=False)

        res1 = learner2.train(a, b)
        res2 = learner.train(a, b)

        np.testing.assert_array_almost_equal(res2.mat[:-1, :], res[0:2, :].mat,
                                             6)
        np.testing.assert_array_almost_equal(res2.mat[-1, :], res[2:3, :].mat,
                                             6)

        new_a = padd_matrix(a, 1)
        self.assertGreater(((a * res1) - b).norm(),
                           ((new_a * res2) - b).norm())
예제 #4
0
    def test_space_train_dense(self):

        test_cases = [
            ([("a", "b", "a_b")], self.space4, self.space5),
            ([("a", "b", "a_b")], self.space4, self.space6),
            ([("a", "b", "a_b"), ("a", "b", "a_a")], self.space4, self.space7),
        ]

        learners = [
            RidgeRegressionLearner(intercept=False,
                                   crossvalidation=False,
                                   param=0),
            LstsqRegressionLearner(intercept=False),
            LstsqRegressionLearner(intercept=True)
        ]

        for in_data, arg_space, phrase_space in test_cases:
            for learner_ in learners:
                comp_model = FullAdditive(learner=learner_)

                comp_model.train(in_data, arg_space, phrase_space)
                comp_space = comp_model.compose(in_data, arg_space)

                np.testing.assert_array_almost_equal(
                    comp_space.cooccurrence_matrix.mat,
                    phrase_space.cooccurrence_matrix.mat, 10)

                self.assertListEqual(comp_space.id2column,
                                     phrase_space.id2column)
                self.assertDictEqual(comp_space.column2id,
                                     phrase_space.column2id)

                self.assertListEqual(comp_space.id2row, phrase_space.id2row)
                self.assertDictEqual(comp_space.row2id, phrase_space.row2id)

                self.assertEqual(comp_model._has_intercept,
                                 learner_._intercept)
예제 #5
0
#load argument and phrase space
arg_space = io_utils.load("./data/out/ex10.pkl")
phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl")

print("\nDefault regression:")
my_comp = LexicalFunction()
print(type(my_comp.regression_learner).__name__)
my_comp.train(train_data, arg_space, phrase_space)

#print its parameters
print("Lexical function space:")
print(my_comp.function_space.id2row)
cooc_mat = my_comp.function_space.cooccurrence_matrix
cooc_mat.reshape(my_comp.function_space.element_shape)
print(cooc_mat)

print("\nRidge Regression with lambda = 2")
rr_learner = RidgeRegressionLearner(param=2,
                                    intercept=False,
                                    crossvalidation=False)
my_comp = LexicalFunction(learner=rr_learner)
my_comp.train(train_data, arg_space, phrase_space)

#print its parameters
print("Lexical function space:")
print(my_comp.function_space.id2row)
cooc_mat = my_comp.function_space.cooccurrence_matrix
cooc_mat.reshape(my_comp.function_space.element_shape)
print(cooc_mat)
예제 #6
0
print "Applying SVD..."
space = space.apply(Svd(100))

print "Creating peripheral space.."
per_space = PeripheralSpace.build(space,
                                  data=data_path + "per.raw.SV.sm",
                                  cols=data_path + "per.raw.SV.cols",
                                  format="sm")

#reading in train data
train_data_file = data_path + "ML08_SV_train.txt"
train_data = io_utils.read_tuple_list(train_data_file, fields=[0, 1, 2])

print "Training Lexical Function composition model..."
comp_model = LexicalFunction(learner=RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)

print "Composing phrases..."
test_phrases_file = data_path + "ML08nvs_test.txt"
test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0, 1, 2])
composed_space = comp_model.compose(test_phrases, space)

print "Reading similarity test data..."
test_similarity_file = data_path + "ML08data_new.txt"
test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0, 1])
gold = io_utils.read_list(test_similarity_file, field=2)

print "Computing similarity with lexical function..."
pred = composed_space.get_sims(test_pairs, CosSimilarity())
예제 #7
0
    def test_trivial_crossvalidation(self):

        for i in range(1, 10):
            m_a = DenseMatrix(np.mat(np.random.random((i + 1, 4))))
            m_b = DenseMatrix(np.mat(np.random.random((i + 1, 4))))
            tmp_a = m_a.mat.copy()
            tmp_b = m_b.mat.copy()

            learner = RidgeRegressionLearner(param_range=[3], intercept=False)
            solution = learner.train(m_a, m_b)

            learner2 = RidgeRegressionLearner(param=3, intercept=False)
            solution2 = learner2.train(m_a, m_b)

            np.testing.assert_array_equal(tmp_a, m_a.mat)
            np.testing.assert_array_equal(tmp_b, m_b.mat)
            np.testing.assert_array_equal(solution.mat, solution2.mat)

            learner = RidgeRegressionLearner(param_range=[3], intercept=False)
            solution = learner.train(m_a, m_b)

            np.testing.assert_array_equal(tmp_a, m_a.mat)
            np.testing.assert_array_equal(tmp_b, m_b.mat)
            np.testing.assert_array_equal(solution.mat, solution2.mat)

            learner = RidgeRegressionLearner(param_range=[0], intercept=False)
            solution = learner.train(m_a, m_b)

            learner2 = LstsqRegressionLearner(intercept=False)
            solution2 = learner2.train(m_a, m_b)

            np.testing.assert_array_almost_equal(solution.mat, solution2.mat,
                                                 3)