Esempio n. 1
0
    def test_train_intercept(self):

        a1_mat = DenseMatrix(np.mat([[3,4],[5,6]]))
        a2_mat = DenseMatrix(np.mat([[1,2],[3,4]]))

        train_data = [("a1", "man", "a1_man"),
                      ("a2", "car", "a2_car"),
                      ("a1", "boy", "a1_boy"),
                      ("a2", "boy", "a2_boy")
                      ]

        n_mat = DenseMatrix(np.mat([[13,21],[3,4],[5,6]]))
        n_space = Space(n_mat, ["man", "car", "boy"], self.ft)

        an1_mat = (a1_mat * n_mat.transpose()).transpose()
        an2_mat = (a2_mat * n_mat.transpose()).transpose()
        an_mat = an1_mat.vstack(an2_mat)

        an_space = Space(an_mat, ["a1_man","a1_car","a1_boy","a2_man","a2_car","a2_boy"], self.ft)

        #test train
        model = LexicalFunction(learner=LstsqRegressionLearner(intercept=True))
        model._MIN_SAMPLES = 1
        model.train(train_data, n_space, an_space)
        a_space = model.function_space

        a1_mat.reshape((1,4))
        #np.testing.assert_array_almost_equal(a1_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[0])

        a2_mat.reshape((1,4))
        #np.testing.assert_array_almost_equal(a2_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[1])

        self.assertListEqual(a_space.id2row, ["a1", "a2"])
        self.assertTupleEqual(a_space.element_shape, (2,3))

        #test compose
        a1_mat = DenseMatrix(np.mat([[3,4,5,6]]))
        a2_mat = DenseMatrix(np.mat([[1,2,3,4]]))
        a_mat = a_space.cooccurrence_matrix

        a_space = Space(a_mat, ["a1", "a2"], [], element_shape=(2,3))
        model = LexicalFunction(function_space=a_space, intercept=True)
        model._MIN_SAMPLES = 1
        comp_space = model.compose(train_data, n_space)

        self.assertListEqual(comp_space.id2row, ["a1_man", "a2_car", "a1_boy", "a2_boy"])
        self.assertListEqual(comp_space.id2column, [])

        self.assertEqual(comp_space.element_shape, (2,))

        np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                             an_mat[[0,4,2,5]].mat, 8)
Esempio n. 2
0
    def test_train_intercept(self):
        a1_mat = DenseMatrix(np.mat([[3, 4], [5, 6]]))
        a2_mat = DenseMatrix(np.mat([[1, 2], [3, 4]]))

        train_data = [("a1", "man", "a1_man"),
                      ("a2", "car", "a2_car"),
                      ("a1", "boy", "a1_boy"),
                      ("a2", "boy", "a2_boy")
        ]

        n_mat = DenseMatrix(np.mat([[13, 21], [3, 4], [5, 6]]))
        n_space = Space(n_mat, ["man", "car", "boy"], self.ft)

        an1_mat = (a1_mat * n_mat.transpose()).transpose()
        an2_mat = (a2_mat * n_mat.transpose()).transpose()
        an_mat = an1_mat.vstack(an2_mat)

        an_space = Space(an_mat, ["a1_man", "a1_car", "a1_boy", "a2_man", "a2_car", "a2_boy"], self.ft)

        #test train
        model = LexicalFunction(learner=LstsqRegressionLearner(intercept=True))
        model.train(train_data, n_space, an_space)
        a_space = model.function_space

        a1_mat.reshape((1, 4))
        #np.testing.assert_array_almost_equal(a1_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[0])

        a2_mat.reshape((1, 4))
        #np.testing.assert_array_almost_equal(a2_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[1])

        self.assertListEqual(a_space.id2row, ["a1", "a2"])
        self.assertTupleEqual(a_space.element_shape, (2, 3))

        #test compose
        a1_mat = DenseMatrix(np.mat([[3, 4, 5, 6]]))
        a2_mat = DenseMatrix(np.mat([[1, 2, 3, 4]]))
        a_mat = a_space.cooccurrence_matrix

        a_space = Space(a_mat, ["a1", "a2"], [], element_shape=(2, 3))
        model = LexicalFunction(function_space=a_space, intercept=True)
        comp_space = model.compose(train_data, n_space)

        self.assertListEqual(comp_space.id2row, ["a1_man", "a2_car", "a1_boy", "a2_boy"])
        self.assertListEqual(comp_space.id2column, [])

        self.assertEqual(comp_space.element_shape, (2,))

        np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                             an_mat[[0, 4, 2, 5]].mat, 8)
Esempio n. 3
0
    def test_3d(self):

        # setting up
        v_mat = DenseMatrix(np.mat([[0,0,1,1,2,2,3,3],#hate
                                    [0,1,2,4,5,6,8,9]])) #love


        vo11_mat = DenseMatrix(np.mat([[0,11],[22,33]])) #hate boy
        vo12_mat = DenseMatrix(np.mat([[0,7],[14,21]])) #hate man
        vo21_mat = DenseMatrix(np.mat([[6,34],[61,94]])) #love boy
        vo22_mat = DenseMatrix(np.mat([[2,10],[17,26]])) #love car

        train_vo_data = [("hate_boy", "man", "man_hate_boy"),
                      ("hate_man", "man", "man_hate_man"),
                      ("hate_boy", "boy", "boy_hate_boy"),
                      ("hate_man", "boy", "boy_hate_man"),
                      ("love_car", "boy", "boy_love_car"),
                      ("love_boy", "man", "man_love_boy"),
                      ("love_boy", "boy", "boy_love_boy"),
                      ("love_car", "man", "man_love_car")
                      ]

        # if do not find a phrase
        # what to do?
        train_v_data = [("love", "boy", "love_boy"),
                        ("hate", "man", "hate_man"),
                        ("hate", "boy", "hate_boy"),
                        ("love", "car", "love_car")]


        sentences = ["man_hate_boy", "car_hate_boy", "boy_hate_boy",
                     "man_hate_man", "car_hate_man", "boy_hate_man",
                     "man_love_boy", "car_love_boy", "boy_love_boy",
                     "man_love_car", "car_love_car", "boy_love_car" ]
        n_mat = DenseMatrix(np.mat([[3,4],[1,2],[5,6]]))


        n_space = Space(n_mat, ["man", "car", "boy"], self.ft)

        s1_mat = (vo11_mat * n_mat.transpose()).transpose()
        s2_mat = (vo12_mat * n_mat.transpose()).transpose()
        s3_mat = (vo21_mat * n_mat.transpose()).transpose()
        s4_mat = (vo22_mat * n_mat.transpose()).transpose()

        s_mat = vo11_mat.nary_vstack([s1_mat,s2_mat,s3_mat,s4_mat])
        s_space = Space(s_mat, sentences, self.ft)

        #test train 2d
        model = LexicalFunction(learner=LstsqRegressionLearner(intercept=False))
        model._MIN_SAMPLES = 1
        model.train(train_vo_data, n_space, s_space)
        vo_space = model.function_space

        self.assertListEqual(vo_space.id2row, ["hate_boy", "hate_man","love_boy", "love_car"])
        self.assertTupleEqual(vo_space.element_shape, (2,2))
        vo11_mat.reshape((1,4))
        np.testing.assert_array_almost_equal(vo11_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[0])
        vo12_mat.reshape((1,4))
        np.testing.assert_array_almost_equal(vo12_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[1])
        vo21_mat.reshape((1,4))
        np.testing.assert_array_almost_equal(vo21_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[2])
        vo22_mat.reshape((1,4))
        np.testing.assert_array_almost_equal(vo22_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[3])

        # test train 3d
        model2 = LexicalFunction(learner=LstsqRegressionLearner(intercept=False))
        model2._MIN_SAMPLES = 1
        model2.train(train_v_data, n_space, vo_space)
        v_space = model2.function_space
        np.testing.assert_array_almost_equal(v_mat.mat,
                                             v_space.cooccurrence_matrix.mat)
        self.assertListEqual(v_space.id2row, ["hate","love"])
        self.assertTupleEqual(v_space.element_shape, (2,2,2))

        # test compose 3d
        vo_space2 = model2.compose(train_v_data, n_space)
        id2row1 = list(vo_space.id2row)
        id2row2 = list(vo_space2.id2row)
        id2row2.sort()
        self.assertListEqual(id2row1, id2row2)
        row_list = vo_space.id2row
        vo_rows1 = vo_space.get_rows(row_list)
        vo_rows2 = vo_space2.get_rows(row_list)
        np.testing.assert_array_almost_equal(vo_rows1.mat, vo_rows2.mat,7)
        self.assertTupleEqual(vo_space.element_shape, vo_space2.element_shape)
    def compute_matreps(self,vecspace,matspace,multiply_matrices=False):
        '''
        This method computes symbolic and numeric matrix representations od a 
        papfunc node, taking as input a vector space, a matrix space. An optional Boolean argument, if set to True, makes matrices to be multiplied rather than summed when both subconstituents have arity greater than 0.
        '''
        # for terminal nodes call insert_terminal_node_representation
        if self.is_terminal():
            matrep,temp_numrep=self.insert_terminal_node_representation(vecspace,matspace)
            self._matrep = matrep
            if temp_numrep[0] == "empty":
                numrep = []
            else:
                numrep = [temp_numrep[0].transpose()]
                dimensionality=(temp_numrep[0].shape[1])
                if len(temp_numrep)>1:
            # all matrices are stored flattened, as long vectors. We need to 
            # reshape them before we use them in computations
                    for x in range(1, (len(temp_numrep))):
                        y = DenseMatrix(temp_numrep[x])
                        y.reshape((dimensionality,(y.shape[1]/dimensionality)))
                        numrep.append(y)
            self._numrep = numrep
        #raise an exception for a non-terminal node without children
        elif len(self._children) == 0:
            raise ValueError("Non-terminal non-branching node!")
        # inherit the value of the single daughter in case of unary branching
        if len(self._children) == 1:
            self._matrep = self.get_child(0)._matrep
            self._numrep = self.get_child(0)._numrep
        #apply composition for binary branching nodes
        if len(self._children) == 2 and self._matrep == []:
            matrep1=self.get_child(0)._matrep
        #ignore 'empty' nodes
            if not matrep1:
                raise ValueError("Empty matrix representation for node %s!" %self.get_child(0))
            matrep2=self.get_child(1)._matrep
            if not matrep2:
                raise ValueError("Empty matrix representation for node %s!" %self.get_child(1))
            arity1=len(matrep1)-1
            arity2=len(matrep2)-1
            # first, compute symbolic matrix representation
            # default to componentwise addition for daughters of equal arity
            if arity1-arity2 == 0:
                for x in range(0, arity1+1):
                    self._matrep.append('(' + matrep1[x] + '+' + matrep2[x] + ')')
            # left function application
            if arity1 < arity2 and not re.search('empty$',matrep2[0]) and not re.search('empty$',matrep1[0]):
                for x in range(0, arity2):
                    if x == 0: #compute the vector
                        self._matrep.append('(' + matrep2[x] + '+' + matrep2[arity2] + '*' + matrep1[x] + ')')
                    # compute a matrix
                    # If both daughters have matrices in the xth position in
                    # their vector-matrix structures, add or multiply those 
                    # matrices according to the multiply_matrices parameter
                    elif x < len(matrep1):
                        if multiply_matrices: self._matrep.append('(' + matrep2[x] + '*' + matrep1[x] + ')')
                        else: self._matrep.append('(' + matrep2[x] + '+' + matrep1[x] + ')')
                    # inherit the function's extra lexical matrix
                    else:
                        self._matrep.append(matrep2[x])
            # right function application
            if arity1 > arity2 and not re.search('empty$',matrep2[0]) and not re.search('empty$',matrep1[0]):
                for x in range(0, arity1):
                    if x == 0:
                        self._matrep.append('(' + matrep1[x] + '+' + matrep1[arity1] + '*' + matrep2[x] + ')')
                    # compute a matrix
                    # If both daughters have matrices in the xth position in
                    # their vector-matrix structures, add or multiply those
                    # matrices according to the multiply_matrices parameter
                    elif x < len(matrep2):
                        if multiply_matrices: self._matrep.append('(' + matrep1[x] + '*' + matrep2[x] + ')')
                        else: self._matrep.append('(' + matrep1[x] + '+' + matrep2[x] + ')')
                    else:
                        self._matrep.append(matrep1[x])
# ignore 'empty' elements
            if re.search('empty$',matrep1[0]):
                self._matrep = matrep2
            if re.search('empty$',matrep2[0]):
                self._matrep = matrep1
            # computing numeric matrix representation of a node from those of its two daughters    
            numrep1=self.get_child(0)._numrep
            numrep2=self.get_child(1)._numrep
            if arity1-arity2 == 0 and numrep1 and numrep2:
                for x in range(0, arity1+1):
                    self._numrep.append(numrep1[x].__add__(numrep2[x]))
            # left function application
            if arity1 < arity2 and not numrep1==[] and not numrep2==[]:
                for x in range(0, arity2):
                    if x == 0: #compute the vector
                        self._numrep.append(numrep2[x].__add__(numrep2[arity2] * padd_matrix(numrep1[x],0)))
                    elif x < len(numrep1):
                        if multiply_matrices:
                            self._numrep.append(numrep2[x] * numrep1[x])
                        else:
                            self._numrep.append(numrep1[x].__add__(numrep2[x]))
                    else:
                        self._numrep.append(numrep2[x])
            # right function application
            if arity1 > arity2 and not numrep1==[] and not numrep2==[]:
                for x in range(0, arity1):
                    if x == 0: # compute the vector
                        self._numrep.append(numrep1[x].__add__(numrep1[arity1] * padd_matrix(numrep2[x],0)))
                    elif x < len(numrep2):
                        if multiply_matrices:
                            self._numrep.append(numrep2[x] * numrep1[x])
                        else:
                            self._numrep.append(numrep1[x].__add__(numrep2[x]))
                    else:
                        self._numrep.append(numrep1[x])
            # ignore 'empty' elements
            if (numrep1 == []):
                self._numrep = numrep2
            if (numrep2 == []):
                self._numrep = numrep1
        # end of numrep computation  
        # Raise an exception for non-binary branching - we don't want to handle those structures
        if len(self._children)>2:
            raise ValueError("Matrix representations are not defined for trees with more than binary branching")
Esempio n. 5
0
 def compute_matreps(self,vecspace,matspace,multiply_matrices=False):
     '''
     This method computes symbolic and numeric matrix representations od a 
     papfunc node, taking as input a vector space, a matrix space. An optional Boolean argument, if set to True, makes matrices to be multiplied rather than summed when both subconstituents have arity greater than 0.
     '''
     # for terminal nodes do lexical insertions by calling 
     #insert_terminal_node_representation
     if self.is_terminal():
         matrep,temp_numrep=self.insert_terminal_node_representation(vecspace,matspace)
         self._matrep = matrep
         if temp_numrep[0] == "empty":
             numrep = [] #default semantic representation for syntactic elements we ignore
         else:
             numrep = [temp_numrep[0].transpose()]
             dimensionality=(temp_numrep[0].shape[1])
             if len(temp_numrep)>1:
                 # Matrices are "flattened", stored as vectors.
                 # We reshape each matrix to a normal shape (usually square)
                 for x in range(1, (len(temp_numrep))):
                     y = DenseMatrix(temp_numrep[x])
                     y.reshape((dimensionality,(y.shape[1]/dimensionality)))
                     numrep.append(y)
         self._numrep = numrep
     #raise an exception for a non-terminal node without children
     elif len(self._children) == 0:
         raise ValueError("Non-terminal non-branching node!")
     # inherit the value of the single daughter in case of unary branching
     if len(self._children) == 1:
         self._matrep = self.get_child(0)._matrep
         self._numrep = self.get_child(0)._numrep
     #apply composition for binary branching nodes
     if len(self._children) == 2 and self._matrep == []:
         matrep1=self.get_child(0)._matrep
         if not matrep1:
             raise ValueError("Empty matrix representation for node %s!" %self.get_child(0))
         matrep2=self.get_child(1)._matrep
         if not matrep2:
             raise ValueError("Empty matrix representation for node %s!" %self.get_child(1))
         #get the arity of two daughter nodes in order to determine which of
         #them is the function and which is the argument
         arity1=len(matrep1)-1
         arity2=len(matrep2)-1
         # first, compute symbolic matrix representation
         if arity1-arity2 == 0:
             for x in range(0, arity1+1):
                 self._matrep.append('(' + matrep1[x] + '+' + matrep2[x] + ')')
         #left application
         if arity1 < arity2 and not re.search('empty$',matrep2[0]) and not re.search('empty$',matrep1[0]):
             for x in range(0, arity2):
                 if x == 0: # compute vector of the mother node
                     self._matrep.append('(' + matrep2[x] + '+' + matrep2[arity2] + '*' + matrep1[x] + ')')
                 elif x < len(matrep1): # compute matrices of the mother node
                     if multiply_matrices: self._matrep.append('(' + matrep2[x] + '*' + matrep1[x] + ')')
                     else: self._matrep.append('(' + matrep2[x] + '+' + matrep1[x] + ')')
                 else:
                     self._matrep.append(matrep2[x])
         #right application
         if arity1 > arity2 and not re.search('empty$',matrep2[0]) and not re.search('empty$',matrep1[0]):
             for x in range(0, arity1):
                 if x == 0:
                     self._matrep.append('(' + matrep1[x] + '+' + matrep1[arity1] + '*' + matrep2[x] + ')')
                 elif x < len(matrep2):
                     if multiply_matrices: self._matrep.append('(' + matrep1[x] + '*' + matrep2[x] + ')')
                     else: self._matrep.append('(' + matrep1[x] + '+' + matrep2[x] + ')')
                 else:
                     self._matrep.append(matrep1[x])
     #if one of the daughters is 'empty' (marked to be ignored), ignore it
         if re.search('empty$',matrep1[0]):
             self._matrep = matrep2
         if re.search('empty$',matrep2[0]):
             self._matrep = matrep1
         # computing numeric matrix representation of a node from those of 
         # its two daughters.
         # First, get arity of the daughters to establish the directionality
         # of function application
         numrep1=self.get_child(0)._numrep
         numrep2=self.get_child(1)._numrep
         if arity1-arity2 == 0 and numrep1 and numrep2:
             for x in range(0, arity1+1):
                 self._numrep.append(numrep1[x].__add__(numrep2[x]))
         #left application
         if arity1 < arity2 and not numrep1==[] and not numrep2==[]:
             for x in range(0, arity2):
                 # compute the vector
                 if x == 0:
                     self._numrep.append(numrep2[x].__add__(numrep2[arity2] * numrep1[x]))
                 # compute a matrix
                 elif x < len(numrep1):
                     if multiply_matrices:
                         self._numrep.append(numrep2[x] * numrep1[x])
                     else:
                         self._numrep.append(numrep1[x].__add__(numrep2[x]))
                 else:
                     self._numrep.append(numrep2[x])
         #right aplication
         if arity1 > arity2 and not numrep1==[] and not numrep2==[]:
             for x in range(0, arity1):
                 if x == 0:
                     self._numrep.append(numrep1[x].__add__(numrep1[arity1]*numrep2[x]))
                 elif x < len(numrep2):
                     if multiply_matrices:
                         self._numrep.append(numrep2[x] * numrep1[x])
                     else:
                         self._numrep.append(numrep1[x].__add__(numrep2[x]))
                 else:
                     self._numrep.append(numrep1[x])
         # ignore 'empty' elements in composition
         if (numrep1 == []):
             self._numrep = numrep2
         if (numrep2 == []):
             self._numrep = numrep1
     # end of numrep computation  
     # Raise an exception for non-binary branching - we don't want to handle those structures
     if len(self._children)>2:
         raise ValueError("Matrix representations are not defined for trees with more than binary branching")
Esempio n. 6
0
    def test_3d(self):
        # setting up
        v_mat = DenseMatrix(np.mat([[0, 0, 1, 1, 2, 2, 3, 3], #hate
                                    [0, 1, 2, 4, 5, 6, 8, 9]])) #love

        vo11_mat = DenseMatrix(np.mat([[0, 11], [22, 33]])) #hate boy
        vo12_mat = DenseMatrix(np.mat([[0, 7], [14, 21]])) #hate man
        vo21_mat = DenseMatrix(np.mat([[6, 34], [61, 94]])) #love boy
        vo22_mat = DenseMatrix(np.mat([[2, 10], [17, 26]])) #love car

        train_vo_data = [("hate_boy", "man", "man_hate_boy"),
                         ("hate_man", "man", "man_hate_man"),
                         ("hate_boy", "boy", "boy_hate_boy"),
                         ("hate_man", "boy", "boy_hate_man"),
                         ("love_car", "boy", "boy_love_car"),
                         ("love_boy", "man", "man_love_boy"),
                         ("love_boy", "boy", "boy_love_boy"),
                         ("love_car", "man", "man_love_car")
        ]

        # if do not find a phrase
        # what to do?
        train_v_data = [("love", "boy", "love_boy"),
                        ("hate", "man", "hate_man"),
                        ("hate", "boy", "hate_boy"),
                        ("love", "car", "love_car")]

        sentences = ["man_hate_boy", "car_hate_boy", "boy_hate_boy",
                     "man_hate_man", "car_hate_man", "boy_hate_man",
                     "man_love_boy", "car_love_boy", "boy_love_boy",
                     "man_love_car", "car_love_car", "boy_love_car"]
        n_mat = DenseMatrix(np.mat([[3, 4], [1, 2], [5, 6]]))

        n_space = Space(n_mat, ["man", "car", "boy"], self.ft)

        s1_mat = (vo11_mat * n_mat.transpose()).transpose()
        s2_mat = (vo12_mat * n_mat.transpose()).transpose()
        s3_mat = (vo21_mat * n_mat.transpose()).transpose()
        s4_mat = (vo22_mat * n_mat.transpose()).transpose()

        s_mat = vo11_mat.nary_vstack([s1_mat, s2_mat, s3_mat, s4_mat])
        s_space = Space(s_mat, sentences, self.ft)

        #test train 2d
        model = LexicalFunction(learner=LstsqRegressionLearner(intercept=False))
        model.train(train_vo_data, n_space, s_space)
        vo_space = model.function_space

        self.assertListEqual(vo_space.id2row, ["hate_boy", "hate_man", "love_boy", "love_car"])
        self.assertTupleEqual(vo_space.element_shape, (2, 2))
        vo11_mat.reshape((1, 4))
        np.testing.assert_array_almost_equal(vo11_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[0])
        vo12_mat.reshape((1, 4))
        np.testing.assert_array_almost_equal(vo12_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[1])
        vo21_mat.reshape((1, 4))
        np.testing.assert_array_almost_equal(vo21_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[2])
        vo22_mat.reshape((1, 4))
        np.testing.assert_array_almost_equal(vo22_mat.mat,
                                             vo_space.cooccurrence_matrix.mat[3])

        # test train 3d
        model2 = LexicalFunction(learner=LstsqRegressionLearner(intercept=False))
        model2.train(train_v_data, n_space, vo_space)
        v_space = model2.function_space
        np.testing.assert_array_almost_equal(v_mat.mat,
                                             v_space.cooccurrence_matrix.mat)
        self.assertListEqual(v_space.id2row, ["hate", "love"])
        self.assertTupleEqual(v_space.element_shape, (2, 2, 2))

        # test compose 3d
        vo_space2 = model2.compose(train_v_data, n_space)
        id2row1 = list(vo_space.id2row)
        id2row2 = list(vo_space2.id2row)
        id2row2.sort()
        self.assertListEqual(id2row1, id2row2)
        row_list = vo_space.id2row
        vo_rows1 = vo_space.get_rows(row_list)
        vo_rows2 = vo_space2.get_rows(row_list)
        np.testing.assert_array_almost_equal(vo_rows1.mat, vo_rows2.mat, 7)
        self.assertTupleEqual(vo_space.element_shape, vo_space2.element_shape)