Esempio n. 1
0
    def test_train_intercept(self):

        a1_mat = DenseMatrix(np.mat([[3,4],[5,6]]))
        a2_mat = DenseMatrix(np.mat([[1,2],[3,4]]))

        train_data = [("a1", "man", "a1_man"),
                      ("a2", "car", "a2_car"),
                      ("a1", "boy", "a1_boy"),
                      ("a2", "boy", "a2_boy")
                      ]

        n_mat = DenseMatrix(np.mat([[13,21],[3,4],[5,6]]))
        n_space = Space(n_mat, ["man", "car", "boy"], self.ft)

        an1_mat = (a1_mat * n_mat.transpose()).transpose()
        an2_mat = (a2_mat * n_mat.transpose()).transpose()
        an_mat = an1_mat.vstack(an2_mat)

        an_space = Space(an_mat, ["a1_man","a1_car","a1_boy","a2_man","a2_car","a2_boy"], self.ft)

        #test train
        model = LexicalFunction(learner=LstsqRegressionLearner(intercept=True))
        model._MIN_SAMPLES = 1
        model.train(train_data, n_space, an_space)
        a_space = model.function_space

        a1_mat.reshape((1,4))
        #np.testing.assert_array_almost_equal(a1_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[0])

        a2_mat.reshape((1,4))
        #np.testing.assert_array_almost_equal(a2_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[1])

        self.assertListEqual(a_space.id2row, ["a1", "a2"])
        self.assertTupleEqual(a_space.element_shape, (2,3))

        #test compose
        a1_mat = DenseMatrix(np.mat([[3,4,5,6]]))
        a2_mat = DenseMatrix(np.mat([[1,2,3,4]]))
        a_mat = a_space.cooccurrence_matrix

        a_space = Space(a_mat, ["a1", "a2"], [], element_shape=(2,3))
        model = LexicalFunction(function_space=a_space, intercept=True)
        model._MIN_SAMPLES = 1
        comp_space = model.compose(train_data, n_space)

        self.assertListEqual(comp_space.id2row, ["a1_man", "a2_car", "a1_boy", "a2_boy"])
        self.assertListEqual(comp_space.id2column, [])

        self.assertEqual(comp_space.element_shape, (2,))

        np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                             an_mat[[0,4,2,5]].mat, 8)
Esempio n. 2
0
    def test_train_intercept(self):
        a1_mat = DenseMatrix(np.mat([[3, 4], [5, 6]]))
        a2_mat = DenseMatrix(np.mat([[1, 2], [3, 4]]))

        train_data = [("a1", "man", "a1_man"),
                      ("a2", "car", "a2_car"),
                      ("a1", "boy", "a1_boy"),
                      ("a2", "boy", "a2_boy")
        ]

        n_mat = DenseMatrix(np.mat([[13, 21], [3, 4], [5, 6]]))
        n_space = Space(n_mat, ["man", "car", "boy"], self.ft)

        an1_mat = (a1_mat * n_mat.transpose()).transpose()
        an2_mat = (a2_mat * n_mat.transpose()).transpose()
        an_mat = an1_mat.vstack(an2_mat)

        an_space = Space(an_mat, ["a1_man", "a1_car", "a1_boy", "a2_man", "a2_car", "a2_boy"], self.ft)

        #test train
        model = LexicalFunction(learner=LstsqRegressionLearner(intercept=True))
        model.train(train_data, n_space, an_space)
        a_space = model.function_space

        a1_mat.reshape((1, 4))
        #np.testing.assert_array_almost_equal(a1_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[0])

        a2_mat.reshape((1, 4))
        #np.testing.assert_array_almost_equal(a2_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[1])

        self.assertListEqual(a_space.id2row, ["a1", "a2"])
        self.assertTupleEqual(a_space.element_shape, (2, 3))

        #test compose
        a1_mat = DenseMatrix(np.mat([[3, 4, 5, 6]]))
        a2_mat = DenseMatrix(np.mat([[1, 2, 3, 4]]))
        a_mat = a_space.cooccurrence_matrix

        a_space = Space(a_mat, ["a1", "a2"], [], element_shape=(2, 3))
        model = LexicalFunction(function_space=a_space, intercept=True)
        comp_space = model.compose(train_data, n_space)

        self.assertListEqual(comp_space.id2row, ["a1_man", "a2_car", "a1_boy", "a2_boy"])
        self.assertListEqual(comp_space.id2column, [])

        self.assertEqual(comp_space.element_shape, (2,))

        np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                             an_mat[[0, 4, 2, 5]].mat, 8)
Esempio n. 3
0
    def test_3d(self):

        # setting up
        v_mat = DenseMatrix(np.mat([[0,0,1,1,2,2,3,3],#hate
                                    [0,1,2,4,5,6,8,9]])) #love


        vo11_mat = DenseMatrix(np.mat([[0,11],[22,33]])) #hate boy
        vo12_mat = DenseMatrix(np.mat([[0,7],[14,21]])) #hate man
        vo21_mat = DenseMatrix(np.mat([[6,34],[61,94]])) #love boy
        vo22_mat = DenseMatrix(np.mat([[2,10],[17,26]])) #love car

        train_vo_data = [("hate_boy", "man", "man_hate_boy"),
                      ("hate_man", "man", "man_hate_man"),
                      ("hate_boy", "boy", "boy_hate_boy"),
                      ("hate_man", "boy", "boy_hate_man"),
                      ("love_car", "boy", "boy_love_car"),
                      ("love_boy", "man", "man_love_boy"),
                      ("love_boy", "boy", "boy_love_boy"),
                      ("love_car", "man", "man_love_car")
                      ]

        # if do not find a phrase
        # what to do?
        train_v_data = [("love", "boy", "love_boy"),
                        ("hate", "man", "hate_man"),
                        ("hate", "boy", "hate_boy"),
                        ("love", "car", "love_car")]


        sentences = ["man_hate_boy", "car_hate_boy", "boy_hate_boy",
                     "man_hate_man", "car_hate_man", "boy_hate_man",
                     "man_love_boy", "car_love_boy", "boy_love_boy",
                     "man_love_car", "car_love_car", "boy_love_car" ]
        n_mat = DenseMatrix(np.mat([[3,4],[1,2],[5,6]]))


        n_space = Space(n_mat, ["man", "car", "boy"], self.ft)

        s1_mat = (vo11_mat * n_mat.transpose()).transpose()
        s2_mat = (vo12_mat * n_mat.transpose()).transpose()
        s3_mat = (vo21_mat * n_mat.transpose()).transpose()
        s4_mat = (vo22_mat * n_mat.transpose()).transpose()

        s_mat = vo11_mat.nary_vstack([s1_mat,s2_mat,s3_mat,s4_mat])
        s_space = Space(s_mat, sentences, self.ft)

        #test train 2d
        model = LexicalFunction(learner=LstsqRegressionLearner(intercept=False))
        model._MIN_SAMPLES = 1
        model.train(train_vo_data, n_space, s_space)
        vo_space = model.function_space

        self.assertListEqual(vo_space.id2row, ["hate_boy", "hate_man","love_boy", "love_car"])
        self.assertTupleEqual(vo_space.element_shape, (2,2))
        vo11_mat.reshape((1,4))
        np.testing.assert_array_almost_equal(vo11_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[0])
        vo12_mat.reshape((1,4))
        np.testing.assert_array_almost_equal(vo12_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[1])
        vo21_mat.reshape((1,4))
        np.testing.assert_array_almost_equal(vo21_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[2])
        vo22_mat.reshape((1,4))
        np.testing.assert_array_almost_equal(vo22_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[3])

        # test train 3d
        model2 = LexicalFunction(learner=LstsqRegressionLearner(intercept=False))
        model2._MIN_SAMPLES = 1
        model2.train(train_v_data, n_space, vo_space)
        v_space = model2.function_space
        np.testing.assert_array_almost_equal(v_mat.mat,
                                             v_space.cooccurrence_matrix.mat)
        self.assertListEqual(v_space.id2row, ["hate","love"])
        self.assertTupleEqual(v_space.element_shape, (2,2,2))

        # test compose 3d
        vo_space2 = model2.compose(train_v_data, n_space)
        id2row1 = list(vo_space.id2row)
        id2row2 = list(vo_space2.id2row)
        id2row2.sort()
        self.assertListEqual(id2row1, id2row2)
        row_list = vo_space.id2row
        vo_rows1 = vo_space.get_rows(row_list)
        vo_rows2 = vo_space2.get_rows(row_list)
        np.testing.assert_array_almost_equal(vo_rows1.mat, vo_rows2.mat,7)
        self.assertTupleEqual(vo_space.element_shape, vo_space2.element_shape)
Esempio n. 4
0
    def test_3d(self):
        # setting up
        v_mat = DenseMatrix(np.mat([[0, 0, 1, 1, 2, 2, 3, 3], #hate
                                    [0, 1, 2, 4, 5, 6, 8, 9]])) #love

        vo11_mat = DenseMatrix(np.mat([[0, 11], [22, 33]])) #hate boy
        vo12_mat = DenseMatrix(np.mat([[0, 7], [14, 21]])) #hate man
        vo21_mat = DenseMatrix(np.mat([[6, 34], [61, 94]])) #love boy
        vo22_mat = DenseMatrix(np.mat([[2, 10], [17, 26]])) #love car

        train_vo_data = [("hate_boy", "man", "man_hate_boy"),
                         ("hate_man", "man", "man_hate_man"),
                         ("hate_boy", "boy", "boy_hate_boy"),
                         ("hate_man", "boy", "boy_hate_man"),
                         ("love_car", "boy", "boy_love_car"),
                         ("love_boy", "man", "man_love_boy"),
                         ("love_boy", "boy", "boy_love_boy"),
                         ("love_car", "man", "man_love_car")
        ]

        # if do not find a phrase
        # what to do?
        train_v_data = [("love", "boy", "love_boy"),
                        ("hate", "man", "hate_man"),
                        ("hate", "boy", "hate_boy"),
                        ("love", "car", "love_car")]

        sentences = ["man_hate_boy", "car_hate_boy", "boy_hate_boy",
                     "man_hate_man", "car_hate_man", "boy_hate_man",
                     "man_love_boy", "car_love_boy", "boy_love_boy",
                     "man_love_car", "car_love_car", "boy_love_car"]
        n_mat = DenseMatrix(np.mat([[3, 4], [1, 2], [5, 6]]))

        n_space = Space(n_mat, ["man", "car", "boy"], self.ft)

        s1_mat = (vo11_mat * n_mat.transpose()).transpose()
        s2_mat = (vo12_mat * n_mat.transpose()).transpose()
        s3_mat = (vo21_mat * n_mat.transpose()).transpose()
        s4_mat = (vo22_mat * n_mat.transpose()).transpose()

        s_mat = vo11_mat.nary_vstack([s1_mat, s2_mat, s3_mat, s4_mat])
        s_space = Space(s_mat, sentences, self.ft)

        #test train 2d
        model = LexicalFunction(learner=LstsqRegressionLearner(intercept=False))
        model.train(train_vo_data, n_space, s_space)
        vo_space = model.function_space

        self.assertListEqual(vo_space.id2row, ["hate_boy", "hate_man", "love_boy", "love_car"])
        self.assertTupleEqual(vo_space.element_shape, (2, 2))
        vo11_mat.reshape((1, 4))
        np.testing.assert_array_almost_equal(vo11_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[0])
        vo12_mat.reshape((1, 4))
        np.testing.assert_array_almost_equal(vo12_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[1])
        vo21_mat.reshape((1, 4))
        np.testing.assert_array_almost_equal(vo21_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[2])
        vo22_mat.reshape((1, 4))
        np.testing.assert_array_almost_equal(vo22_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[3])

        # test train 3d
        model2 = LexicalFunction(learner=LstsqRegressionLearner(intercept=False))
        model2.train(train_v_data, n_space, vo_space)
        v_space = model2.function_space
        np.testing.assert_array_almost_equal(v_mat.mat,
                                             v_space.cooccurrence_matrix.mat)
        self.assertListEqual(v_space.id2row, ["hate", "love"])
        self.assertTupleEqual(v_space.element_shape, (2, 2, 2))

        # test compose 3d
        vo_space2 = model2.compose(train_v_data, n_space)
        id2row1 = list(vo_space.id2row)
        id2row2 = list(vo_space2.id2row)
        id2row2.sort()
        self.assertListEqual(id2row1, id2row2)
        row_list = vo_space.id2row
        vo_rows1 = vo_space.get_rows(row_list)
        vo_rows2 = vo_space2.get_rows(row_list)
        np.testing.assert_array_almost_equal(vo_rows1.mat, vo_rows2.mat, 7)
        self.assertTupleEqual(vo_space.element_shape, vo_space2.element_shape)
Esempio n. 5
0
    def tracenorm_regression(matrix_a , matrix_b, lmbd, iterations, intercept=False):
        #log.print_info(logger, "In Tracenorm regression..", 4)
        #log.print_matrix_info(logger, matrix_a, 5, "Input matrix A:")
        #log.print_matrix_info(logger, matrix_b, 5, "Input matrix B:")
        """
        Performs Trace Norm Regression.

        This method uses approximate gradient descent
        to solve the problem:
            :math:`X = argmin(||AX - B||_2 + \\lambda||X||_*)`
        where :math:`||X||_*` is the trace norm of :math:`X`, the sum of its
        singular values.
        It is implemented for dense matrices only.
        The algorithm is the Extended Gradient Algorithm from (Ji and Ye, 2009).

        Args:
            matrix_a: input matrix A, of type Matrix
            matrix_b: input matrix A, of type Matrix. If None, it is defined as matrix_a
            lambda_: scalar, lambda parameter
            intercept: bool. If True intercept is used. Optional, default False.

        Returns:
            solution X of type Matrix

        """

        if intercept:
            matrix_a = matrix_a.hstack(matrix_type(np.ones((matrix_a.shape[0],
                                                             1))))
        if matrix_b == None:
            matrix_b = matrix_a

        
        # TODO remove this
        matrix_a = DenseMatrix(matrix_a).mat
        matrix_b = DenseMatrix(matrix_b).mat

        # Matrix shapes
        p = matrix_a.shape[0]
        q = matrix_a.shape[1]
        assert_same_shape(matrix_a, matrix_b, 0)

        # Initialization of the algorithm
        W = (1.0/p)* Linalg._kronecker_product(matrix_a)

        # Sub-expressions reused at various places in the code
        matrix_a_t = matrix_a.transpose()
        at_times_a = np.dot(matrix_a_t, matrix_a)

        # Epsilon: to ensure that our bound on the Lipschitz constant is large enough
        epsilon_lbound = 0.05
        # Expression of the bound of the Lipschitz constant of the cost function
        L_bound = (1+epsilon_lbound)*2*Linalg._frobenius_norm_squared(at_times_a)
        # Current "guess" of the local Lipschitz constant
        L = 1.0
        # Factor by which L should be increased when it happens to be too small
        gamma = 1.2
        # Epsilon to ensure that mu is increased when the inequality hold tightly
        epsilon_cost = 0.00001
        # Real lambda: resized according to the number of training samples (?)
        lambda_ = lmbd*p
        # Variables used for the accelerated algorithm (check the original paper)
        Z = W
        alpha = 1.0
        # Halting condition
        epsilon = 0.00001
        last_cost = 1
        current_cost = -1
        linalg_error_caught = False

        costs = []
        iter_counter = 0
        while iter_counter < iterations and (abs((current_cost - last_cost)/last_cost)>epsilon) and not linalg_error_caught:
            sys.stdout.flush()
            # Cost tracking
            try:
                next_W, tracenorm = Linalg._next_tracenorm_guess(matrix_a, matrix_b, lambda_, L, Z, at_times_a)
            except LinAlgError:
                print "LinAlgError caught in trace norm regression"
                linalg_error_caught = True
                break

            last_cost = current_cost
            current_fitness = Linalg._fitness(matrix_a, matrix_b, next_W)
            current_cost = current_fitness + lambda_ * tracenorm
            if iter_counter > 0: # The first scores are messy
                cost_list =  [L, L_bound, current_fitness, current_cost]
                costs.append(cost_list)

            while (current_fitness + epsilon_cost >=
                    Linalg._intermediate_cost(matrix_a, matrix_b, next_W, Z, L)):
                if L > L_bound:
                    print "Trace Norm Regression: numerical error detected at iteration "+str(iter_counter)
                    break
                L = gamma * L
                try:
                    next_W, tracenorm = Linalg._next_tracenorm_guess(matrix_a, matrix_b, lambda_, L, Z, at_times_a)
                except LinAlgError:
                    print "LinAlgError caught in trace norm regression"
                    linalg_error_caught = True
                    break

                last_cost = current_cost
                current_fitness = Linalg._fitness(matrix_a, matrix_a, next_W)
                current_cost = current_fitness + lambda_*tracenorm

            if linalg_error_caught:
                break

            previous_W = W
            W = next_W
            previous_alpha = alpha
            alpha = (1.0 + sqrt(1.0 + 4.0*alpha*alpha))/2.0
            Z = W
            # Z = W + ((alpha - 1)/alpha)*(W - previous_W)
            iter_counter += 1

        sys.stdout.flush()
        W = np.real(W)
        return DenseMatrix(W), costs