Esempio n. 1
0
    def test_space_compose_dense(self):

        test_cases = [
            ([("a", "b", "a_b")], self.space4, self.space5,
             DenseMatrix.identity(2), DenseMatrix.identity(2)),
            ([("a", "b", "a_b")], self.space4, self.space6,
             np.mat([[0, 0], [0, 0]]), np.mat([[0, 0], [0, 0]])),
            ([("a", "b", "a_b"), ("a", "b", "a_a")], self.space4, self.space7,
             DenseMatrix.identity(2), DenseMatrix.identity(2)),
        ]

        for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases:
            comp_model = FullAdditive(A=mat_a, B=mat_b)
            comp_space = comp_model.compose(in_data, arg_space)

            np.testing.assert_array_almost_equal(
                comp_space.cooccurrence_matrix.mat,
                phrase_space.cooccurrence_matrix.mat, 10)

            self.assertListEqual(comp_space.id2column, [])
            self.assertDictEqual(comp_space.column2id, {})

            self.assertListEqual(comp_space.id2row, phrase_space.id2row)
            self.assertDictEqual(comp_space.row2id, phrase_space.row2id)

            self.assertFalse(comp_model._has_intercept)
Esempio n. 2
0
    def test_train1(self):
        test_cases = [(self.m11, self.m21, self.ph1, np.mat([[2]]),
                       np.mat([[3]])),
                      (self.m11, self.m21, DenseMatrix(np.mat([[0], [0]])),
                       np.mat([[0]]), np.mat([[0]]))]

        for m1, m2, ph, expected_a, expected_b in test_cases:
            comp_model = FullAdditive(learner=LstsqRegressionLearner(
                intercept=False))
            comp_model._train(m1, m2, ph)
            np.testing.assert_array_almost_equal(
                comp_model._mat_a_t.transpose().mat, expected_a, 10)
            np.testing.assert_array_almost_equal(
                comp_model._mat_b_t.transpose().mat, expected_b, 10)
Esempio n. 3
0
    def test_train2(self):
        dim_ = 2
        dim_1 = 3
        dim_2 = 5
        for dim in [dim_1 + dim_2, dim_1 + dim_2 + 2]:
            expected_a = np.mat(np.random.random((dim_, dim_1)))
            expected_b = np.mat(np.random.random((dim_, dim_2)))
            m1 = np.mat(np.random.random((dim, dim_1)))
            m2 = np.mat(np.random.random((dim, dim_2)))

            ph = np.mat(expected_a * m1.T + expected_b * m2.T)

            comp_model = FullAdditive(learner=LstsqRegressionLearner(
                intercept=False))
            comp_model._train(DenseMatrix(m1), DenseMatrix(m2),
                              DenseMatrix(ph).transpose())
            np.testing.assert_array_almost_equal(
                comp_model._mat_a_t.transpose().mat, expected_a, 10)
            np.testing.assert_array_almost_equal(
                comp_model._mat_b_t.transpose().mat, expected_b, 10)

        for dim in [dim_1 + dim_2 + 6, dim_1 + dim_2 + 20]:
            expected_a = np.mat(np.random.random((dim_, dim_1)))
            expected_b = np.mat(np.random.random((dim_, dim_2)))
            m1 = np.mat(np.random.random((dim, dim_1)))
            m2 = np.mat(np.random.random((dim, dim_2)))

            ph = np.mat(expected_a * m1.T + expected_b * m2.T)

            comp_model = FullAdditive(learner=LstsqRegressionLearner(
                intercept=True))
            comp_model._train(DenseMatrix(m1), DenseMatrix(m2),
                              DenseMatrix(ph).transpose())
            np.testing.assert_array_almost_equal(
                comp_model._mat_a_t.transpose().mat, expected_a, 10)
            np.testing.assert_array_almost_equal(
                comp_model._mat_b_t[:-1, :].transpose().mat, expected_b, 10)
Esempio n. 4
0
    def test_full_additive(self):

        self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]]))
        self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]]))
        self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]]))
        self.row = ["a", "b"]
        self.ft = ["f1", "f2"]
        self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
        self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft)
        m = FullAdditive()
        self.assertRaises(IllegalStateError, m.export, self.prefix + ".full1")
        m.train([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1,
                self.space2)

        m.export(self.prefix + ".full2")
Esempio n. 5
0
    def test_space_compose_sparse(self):
        #WHAT TO DO HERE???
        #PARAMETERS ARE GIVEN AS DENSE MATRICES, INPUT DATA AS SPARSE??

        test_cases = [([("a", "b", "a_b")], self.space1, self.space2,
                       DenseMatrix.identity(2), DenseMatrix.identity(2)),
                      ([("a", "b", "a_b")], self.space1, self.space3,
                       np.mat([[0, 0], [0, 0]]), np.mat([[0, 0], [0, 0]]))]

        for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases:
            comp_model = FullAdditive(A=mat_a, B=mat_b)
            comp_space = comp_model.compose(in_data, arg_space)

            np.testing.assert_array_almost_equal(
                comp_space.cooccurrence_matrix.mat.todense(),
                phrase_space.cooccurrence_matrix.mat.todense(), 10)
Esempio n. 6
0
    def test_space_train_dense(self):

        test_cases = [
            ([("a", "b", "a_b")], self.space4, self.space5),
            ([("a", "b", "a_b")], self.space4, self.space6),
            ([("a", "b", "a_b"), ("a", "b", "a_a")], self.space4, self.space7),
        ]

        learners = [
            RidgeRegressionLearner(intercept=False,
                                   crossvalidation=False,
                                   param=0),
            LstsqRegressionLearner(intercept=False),
            LstsqRegressionLearner(intercept=True)
        ]

        for in_data, arg_space, phrase_space in test_cases:
            for learner_ in learners:
                comp_model = FullAdditive(learner=learner_)

                comp_model.train(in_data, arg_space, phrase_space)
                comp_space = comp_model.compose(in_data, arg_space)

                np.testing.assert_array_almost_equal(
                    comp_space.cooccurrence_matrix.mat,
                    phrase_space.cooccurrence_matrix.mat, 10)

                self.assertListEqual(comp_space.id2column,
                                     phrase_space.id2column)
                self.assertDictEqual(comp_space.column2id,
                                     phrase_space.column2id)

                self.assertListEqual(comp_space.id2row, phrase_space.id2row)
                self.assertDictEqual(comp_space.row2id, phrase_space.row2id)

                self.assertEqual(comp_model._has_intercept,
                                 learner_._intercept)
Esempio n. 7
0
composed_space = comp_model.compose(test_phrases, space)

print "Reading similarity test data..."
test_similarity_file = data_path + "ML08data_new.txt"
test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0, 1])
gold = io_utils.read_list(test_similarity_file, field=2)

print "Computing similarity with lexical function..."
pred = composed_space.get_sims(test_pairs, CosSimilarity())

#use this composed space to assign similarities
print "Scoring lexical function..."
print scoring_utils.score(gold, pred, "spearman")

print "Training Full Additive composition model..."
comp_model = FullAdditive(learner=RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)
composed_space = comp_model.compose(test_phrases, space)
pred = composed_space.get_sims(test_pairs, CosSimilarity())
print scoring_utils.score(gold, pred, "spearman")

print "Training Weighted Additive composition model..."
comp_model = WeightedAdditive()
comp_model.train(train_data, space, per_space)
print "alpha, beta:", comp_model.alpha, comp_model.beta
composed_space = comp_model.compose(test_phrases, space)
pred = composed_space.get_sims(test_pairs, CosSimilarity())
print scoring_utils.score(gold, pred, "spearman")

print "Training Dilation composition model..."
comp_model = Dilation()
Esempio n. 8
0
from composes.composition.full_additive import FullAdditive

#training data
train_data = [("good", "car", "good_car"),
              ("good", "book", "good_book")
              ]

#load an argument space
arg_space = io_utils.load("./data/out/ex10.pkl")

#load a phrase space
phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl")
print("Training phrase space")
print(phrase_space.id2row)
print(phrase_space.cooccurrence_matrix)

#train a FullAdditive model on the data
my_comp = FullAdditive()
my_comp.train(train_data, arg_space, phrase_space)

#print its parameters
print("\nA:", my_comp._mat_a_t.transpose())
print("B:", my_comp._mat_b_t.transpose())

#use the model to compose the train data
composed_space  = my_comp.compose([("good", "bike", "good_bike")],
                                  arg_space)
print("\nComposed space:")
print(composed_space.id2row)
print(composed_space.cooccurrence_matrix)