def test_train_intercept(self): a1_mat = DenseMatrix(np.mat([[3,4],[5,6]])) a2_mat = DenseMatrix(np.mat([[1,2],[3,4]])) train_data = [("a1", "man", "a1_man"), ("a2", "car", "a2_car"), ("a1", "boy", "a1_boy"), ("a2", "boy", "a2_boy") ] n_mat = DenseMatrix(np.mat([[13,21],[3,4],[5,6]])) n_space = Space(n_mat, ["man", "car", "boy"], self.ft) an1_mat = (a1_mat * n_mat.transpose()).transpose() an2_mat = (a2_mat * n_mat.transpose()).transpose() an_mat = an1_mat.vstack(an2_mat) an_space = Space(an_mat, ["a1_man","a1_car","a1_boy","a2_man","a2_car","a2_boy"], self.ft) #test train model = LexicalFunction(learner=LstsqRegressionLearner(intercept=True)) model._MIN_SAMPLES = 1 model.train(train_data, n_space, an_space) a_space = model.function_space a1_mat.reshape((1,4)) #np.testing.assert_array_almost_equal(a1_mat.mat, # a_space.cooccurrence_matrix.mat[0]) a2_mat.reshape((1,4)) #np.testing.assert_array_almost_equal(a2_mat.mat, # a_space.cooccurrence_matrix.mat[1]) self.assertListEqual(a_space.id2row, ["a1", "a2"]) self.assertTupleEqual(a_space.element_shape, (2,3)) #test compose a1_mat = DenseMatrix(np.mat([[3,4,5,6]])) a2_mat = DenseMatrix(np.mat([[1,2,3,4]])) a_mat = a_space.cooccurrence_matrix a_space = Space(a_mat, ["a1", "a2"], [], element_shape=(2,3)) model = LexicalFunction(function_space=a_space, intercept=True) model._MIN_SAMPLES = 1 comp_space = model.compose(train_data, n_space) self.assertListEqual(comp_space.id2row, ["a1_man", "a2_car", "a1_boy", "a2_boy"]) self.assertListEqual(comp_space.id2column, []) self.assertEqual(comp_space.element_shape, (2,)) np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat, an_mat[[0,4,2,5]].mat, 8)
def test_train_intercept(self): a1_mat = DenseMatrix(np.mat([[3, 4], [5, 6]])) a2_mat = DenseMatrix(np.mat([[1, 2], [3, 4]])) train_data = [("a1", "man", "a1_man"), ("a2", "car", "a2_car"), ("a1", "boy", "a1_boy"), ("a2", "boy", "a2_boy") ] n_mat = DenseMatrix(np.mat([[13, 21], [3, 4], [5, 6]])) n_space = Space(n_mat, ["man", "car", "boy"], self.ft) an1_mat = (a1_mat * n_mat.transpose()).transpose() an2_mat = (a2_mat * n_mat.transpose()).transpose() an_mat = an1_mat.vstack(an2_mat) an_space = Space(an_mat, ["a1_man", "a1_car", "a1_boy", "a2_man", "a2_car", "a2_boy"], self.ft) #test train model = LexicalFunction(learner=LstsqRegressionLearner(intercept=True)) model.train(train_data, n_space, an_space) a_space = model.function_space a1_mat.reshape((1, 4)) #np.testing.assert_array_almost_equal(a1_mat.mat, # a_space.cooccurrence_matrix.mat[0]) a2_mat.reshape((1, 4)) #np.testing.assert_array_almost_equal(a2_mat.mat, # a_space.cooccurrence_matrix.mat[1]) self.assertListEqual(a_space.id2row, ["a1", "a2"]) self.assertTupleEqual(a_space.element_shape, (2, 3)) #test compose a1_mat = DenseMatrix(np.mat([[3, 4, 5, 6]])) a2_mat = DenseMatrix(np.mat([[1, 2, 3, 4]])) a_mat = a_space.cooccurrence_matrix a_space = Space(a_mat, ["a1", "a2"], [], element_shape=(2, 3)) model = LexicalFunction(function_space=a_space, intercept=True) comp_space = model.compose(train_data, n_space) self.assertListEqual(comp_space.id2row, ["a1_man", "a2_car", "a1_boy", "a2_boy"]) self.assertListEqual(comp_space.id2column, []) self.assertEqual(comp_space.element_shape, (2,)) np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat, an_mat[[0, 4, 2, 5]].mat, 8)
def test_3d(self): # setting up v_mat = DenseMatrix(np.mat([[0,0,1,1,2,2,3,3],#hate [0,1,2,4,5,6,8,9]])) #love vo11_mat = DenseMatrix(np.mat([[0,11],[22,33]])) #hate boy vo12_mat = DenseMatrix(np.mat([[0,7],[14,21]])) #hate man vo21_mat = DenseMatrix(np.mat([[6,34],[61,94]])) #love boy vo22_mat = DenseMatrix(np.mat([[2,10],[17,26]])) #love car train_vo_data = [("hate_boy", "man", "man_hate_boy"), ("hate_man", "man", "man_hate_man"), ("hate_boy", "boy", "boy_hate_boy"), ("hate_man", "boy", "boy_hate_man"), ("love_car", "boy", "boy_love_car"), ("love_boy", "man", "man_love_boy"), ("love_boy", "boy", "boy_love_boy"), ("love_car", "man", "man_love_car") ] # if do not find a phrase # what to do? train_v_data = [("love", "boy", "love_boy"), ("hate", "man", "hate_man"), ("hate", "boy", "hate_boy"), ("love", "car", "love_car")] sentences = ["man_hate_boy", "car_hate_boy", "boy_hate_boy", "man_hate_man", "car_hate_man", "boy_hate_man", "man_love_boy", "car_love_boy", "boy_love_boy", "man_love_car", "car_love_car", "boy_love_car" ] n_mat = DenseMatrix(np.mat([[3,4],[1,2],[5,6]])) n_space = Space(n_mat, ["man", "car", "boy"], self.ft) s1_mat = (vo11_mat * n_mat.transpose()).transpose() s2_mat = (vo12_mat * n_mat.transpose()).transpose() s3_mat = (vo21_mat * n_mat.transpose()).transpose() s4_mat = (vo22_mat * n_mat.transpose()).transpose() s_mat = vo11_mat.nary_vstack([s1_mat,s2_mat,s3_mat,s4_mat]) s_space = Space(s_mat, sentences, self.ft) #test train 2d model = LexicalFunction(learner=LstsqRegressionLearner(intercept=False)) model._MIN_SAMPLES = 1 model.train(train_vo_data, n_space, s_space) vo_space = model.function_space self.assertListEqual(vo_space.id2row, ["hate_boy", "hate_man","love_boy", "love_car"]) self.assertTupleEqual(vo_space.element_shape, (2,2)) vo11_mat.reshape((1,4)) np.testing.assert_array_almost_equal(vo11_mat.mat, vo_space.cooccurrence_matrix.mat[0]) vo12_mat.reshape((1,4)) np.testing.assert_array_almost_equal(vo12_mat.mat, vo_space.cooccurrence_matrix.mat[1]) vo21_mat.reshape((1,4)) np.testing.assert_array_almost_equal(vo21_mat.mat, vo_space.cooccurrence_matrix.mat[2]) vo22_mat.reshape((1,4)) np.testing.assert_array_almost_equal(vo22_mat.mat, vo_space.cooccurrence_matrix.mat[3]) # test train 3d model2 = LexicalFunction(learner=LstsqRegressionLearner(intercept=False)) model2._MIN_SAMPLES = 1 model2.train(train_v_data, n_space, vo_space) v_space = model2.function_space np.testing.assert_array_almost_equal(v_mat.mat, v_space.cooccurrence_matrix.mat) self.assertListEqual(v_space.id2row, ["hate","love"]) self.assertTupleEqual(v_space.element_shape, (2,2,2)) # test compose 3d vo_space2 = model2.compose(train_v_data, n_space) id2row1 = list(vo_space.id2row) id2row2 = list(vo_space2.id2row) id2row2.sort() self.assertListEqual(id2row1, id2row2) row_list = vo_space.id2row vo_rows1 = vo_space.get_rows(row_list) vo_rows2 = vo_space2.get_rows(row_list) np.testing.assert_array_almost_equal(vo_rows1.mat, vo_rows2.mat,7) self.assertTupleEqual(vo_space.element_shape, vo_space2.element_shape)
def compute_matreps(self,vecspace,matspace,multiply_matrices=False): ''' This method computes symbolic and numeric matrix representations od a papfunc node, taking as input a vector space, a matrix space. An optional Boolean argument, if set to True, makes matrices to be multiplied rather than summed when both subconstituents have arity greater than 0. ''' # for terminal nodes call insert_terminal_node_representation if self.is_terminal(): matrep,temp_numrep=self.insert_terminal_node_representation(vecspace,matspace) self._matrep = matrep if temp_numrep[0] == "empty": numrep = [] else: numrep = [temp_numrep[0].transpose()] dimensionality=(temp_numrep[0].shape[1]) if len(temp_numrep)>1: # all matrices are stored flattened, as long vectors. We need to # reshape them before we use them in computations for x in range(1, (len(temp_numrep))): y = DenseMatrix(temp_numrep[x]) y.reshape((dimensionality,(y.shape[1]/dimensionality))) numrep.append(y) self._numrep = numrep #raise an exception for a non-terminal node without children elif len(self._children) == 0: raise ValueError("Non-terminal non-branching node!") # inherit the value of the single daughter in case of unary branching if len(self._children) == 1: self._matrep = self.get_child(0)._matrep self._numrep = self.get_child(0)._numrep #apply composition for binary branching nodes if len(self._children) == 2 and self._matrep == []: matrep1=self.get_child(0)._matrep #ignore 'empty' nodes if not matrep1: raise ValueError("Empty matrix representation for node %s!" %self.get_child(0)) matrep2=self.get_child(1)._matrep if not matrep2: raise ValueError("Empty matrix representation for node %s!" %self.get_child(1)) arity1=len(matrep1)-1 arity2=len(matrep2)-1 # first, compute symbolic matrix representation # default to componentwise addition for daughters of equal arity if arity1-arity2 == 0: for x in range(0, arity1+1): self._matrep.append('(' + matrep1[x] + '+' + matrep2[x] + ')') # left function application if arity1 < arity2 and not re.search('empty$',matrep2[0]) and not re.search('empty$',matrep1[0]): for x in range(0, arity2): if x == 0: #compute the vector self._matrep.append('(' + matrep2[x] + '+' + matrep2[arity2] + '*' + matrep1[x] + ')') # compute a matrix # If both daughters have matrices in the xth position in # their vector-matrix structures, add or multiply those # matrices according to the multiply_matrices parameter elif x < len(matrep1): if multiply_matrices: self._matrep.append('(' + matrep2[x] + '*' + matrep1[x] + ')') else: self._matrep.append('(' + matrep2[x] + '+' + matrep1[x] + ')') # inherit the function's extra lexical matrix else: self._matrep.append(matrep2[x]) # right function application if arity1 > arity2 and not re.search('empty$',matrep2[0]) and not re.search('empty$',matrep1[0]): for x in range(0, arity1): if x == 0: self._matrep.append('(' + matrep1[x] + '+' + matrep1[arity1] + '*' + matrep2[x] + ')') # compute a matrix # If both daughters have matrices in the xth position in # their vector-matrix structures, add or multiply those # matrices according to the multiply_matrices parameter elif x < len(matrep2): if multiply_matrices: self._matrep.append('(' + matrep1[x] + '*' + matrep2[x] + ')') else: self._matrep.append('(' + matrep1[x] + '+' + matrep2[x] + ')') else: self._matrep.append(matrep1[x]) # ignore 'empty' elements if re.search('empty$',matrep1[0]): self._matrep = matrep2 if re.search('empty$',matrep2[0]): self._matrep = matrep1 # computing numeric matrix representation of a node from those of its two daughters numrep1=self.get_child(0)._numrep numrep2=self.get_child(1)._numrep if arity1-arity2 == 0 and numrep1 and numrep2: for x in range(0, arity1+1): self._numrep.append(numrep1[x].__add__(numrep2[x])) # left function application if arity1 < arity2 and not numrep1==[] and not numrep2==[]: for x in range(0, arity2): if x == 0: #compute the vector self._numrep.append(numrep2[x].__add__(numrep2[arity2] * padd_matrix(numrep1[x],0))) elif x < len(numrep1): if multiply_matrices: self._numrep.append(numrep2[x] * numrep1[x]) else: self._numrep.append(numrep1[x].__add__(numrep2[x])) else: self._numrep.append(numrep2[x]) # right function application if arity1 > arity2 and not numrep1==[] and not numrep2==[]: for x in range(0, arity1): if x == 0: # compute the vector self._numrep.append(numrep1[x].__add__(numrep1[arity1] * padd_matrix(numrep2[x],0))) elif x < len(numrep2): if multiply_matrices: self._numrep.append(numrep2[x] * numrep1[x]) else: self._numrep.append(numrep1[x].__add__(numrep2[x])) else: self._numrep.append(numrep1[x]) # ignore 'empty' elements if (numrep1 == []): self._numrep = numrep2 if (numrep2 == []): self._numrep = numrep1 # end of numrep computation # Raise an exception for non-binary branching - we don't want to handle those structures if len(self._children)>2: raise ValueError("Matrix representations are not defined for trees with more than binary branching")
def compute_matreps(self,vecspace,matspace,multiply_matrices=False): ''' This method computes symbolic and numeric matrix representations od a papfunc node, taking as input a vector space, a matrix space. An optional Boolean argument, if set to True, makes matrices to be multiplied rather than summed when both subconstituents have arity greater than 0. ''' # for terminal nodes do lexical insertions by calling #insert_terminal_node_representation if self.is_terminal(): matrep,temp_numrep=self.insert_terminal_node_representation(vecspace,matspace) self._matrep = matrep if temp_numrep[0] == "empty": numrep = [] #default semantic representation for syntactic elements we ignore else: numrep = [temp_numrep[0].transpose()] dimensionality=(temp_numrep[0].shape[1]) if len(temp_numrep)>1: # Matrices are "flattened", stored as vectors. # We reshape each matrix to a normal shape (usually square) for x in range(1, (len(temp_numrep))): y = DenseMatrix(temp_numrep[x]) y.reshape((dimensionality,(y.shape[1]/dimensionality))) numrep.append(y) self._numrep = numrep #raise an exception for a non-terminal node without children elif len(self._children) == 0: raise ValueError("Non-terminal non-branching node!") # inherit the value of the single daughter in case of unary branching if len(self._children) == 1: self._matrep = self.get_child(0)._matrep self._numrep = self.get_child(0)._numrep #apply composition for binary branching nodes if len(self._children) == 2 and self._matrep == []: matrep1=self.get_child(0)._matrep if not matrep1: raise ValueError("Empty matrix representation for node %s!" %self.get_child(0)) matrep2=self.get_child(1)._matrep if not matrep2: raise ValueError("Empty matrix representation for node %s!" %self.get_child(1)) #get the arity of two daughter nodes in order to determine which of #them is the function and which is the argument arity1=len(matrep1)-1 arity2=len(matrep2)-1 # first, compute symbolic matrix representation if arity1-arity2 == 0: for x in range(0, arity1+1): self._matrep.append('(' + matrep1[x] + '+' + matrep2[x] + ')') #left application if arity1 < arity2 and not re.search('empty$',matrep2[0]) and not re.search('empty$',matrep1[0]): for x in range(0, arity2): if x == 0: # compute vector of the mother node self._matrep.append('(' + matrep2[x] + '+' + matrep2[arity2] + '*' + matrep1[x] + ')') elif x < len(matrep1): # compute matrices of the mother node if multiply_matrices: self._matrep.append('(' + matrep2[x] + '*' + matrep1[x] + ')') else: self._matrep.append('(' + matrep2[x] + '+' + matrep1[x] + ')') else: self._matrep.append(matrep2[x]) #right application if arity1 > arity2 and not re.search('empty$',matrep2[0]) and not re.search('empty$',matrep1[0]): for x in range(0, arity1): if x == 0: self._matrep.append('(' + matrep1[x] + '+' + matrep1[arity1] + '*' + matrep2[x] + ')') elif x < len(matrep2): if multiply_matrices: self._matrep.append('(' + matrep1[x] + '*' + matrep2[x] + ')') else: self._matrep.append('(' + matrep1[x] + '+' + matrep2[x] + ')') else: self._matrep.append(matrep1[x]) #if one of the daughters is 'empty' (marked to be ignored), ignore it if re.search('empty$',matrep1[0]): self._matrep = matrep2 if re.search('empty$',matrep2[0]): self._matrep = matrep1 # computing numeric matrix representation of a node from those of # its two daughters. # First, get arity of the daughters to establish the directionality # of function application numrep1=self.get_child(0)._numrep numrep2=self.get_child(1)._numrep if arity1-arity2 == 0 and numrep1 and numrep2: for x in range(0, arity1+1): self._numrep.append(numrep1[x].__add__(numrep2[x])) #left application if arity1 < arity2 and not numrep1==[] and not numrep2==[]: for x in range(0, arity2): # compute the vector if x == 0: self._numrep.append(numrep2[x].__add__(numrep2[arity2] * numrep1[x])) # compute a matrix elif x < len(numrep1): if multiply_matrices: self._numrep.append(numrep2[x] * numrep1[x]) else: self._numrep.append(numrep1[x].__add__(numrep2[x])) else: self._numrep.append(numrep2[x]) #right aplication if arity1 > arity2 and not numrep1==[] and not numrep2==[]: for x in range(0, arity1): if x == 0: self._numrep.append(numrep1[x].__add__(numrep1[arity1]*numrep2[x])) elif x < len(numrep2): if multiply_matrices: self._numrep.append(numrep2[x] * numrep1[x]) else: self._numrep.append(numrep1[x].__add__(numrep2[x])) else: self._numrep.append(numrep1[x]) # ignore 'empty' elements in composition if (numrep1 == []): self._numrep = numrep2 if (numrep2 == []): self._numrep = numrep1 # end of numrep computation # Raise an exception for non-binary branching - we don't want to handle those structures if len(self._children)>2: raise ValueError("Matrix representations are not defined for trees with more than binary branching")
def test_3d(self): # setting up v_mat = DenseMatrix(np.mat([[0, 0, 1, 1, 2, 2, 3, 3], #hate [0, 1, 2, 4, 5, 6, 8, 9]])) #love vo11_mat = DenseMatrix(np.mat([[0, 11], [22, 33]])) #hate boy vo12_mat = DenseMatrix(np.mat([[0, 7], [14, 21]])) #hate man vo21_mat = DenseMatrix(np.mat([[6, 34], [61, 94]])) #love boy vo22_mat = DenseMatrix(np.mat([[2, 10], [17, 26]])) #love car train_vo_data = [("hate_boy", "man", "man_hate_boy"), ("hate_man", "man", "man_hate_man"), ("hate_boy", "boy", "boy_hate_boy"), ("hate_man", "boy", "boy_hate_man"), ("love_car", "boy", "boy_love_car"), ("love_boy", "man", "man_love_boy"), ("love_boy", "boy", "boy_love_boy"), ("love_car", "man", "man_love_car") ] # if do not find a phrase # what to do? train_v_data = [("love", "boy", "love_boy"), ("hate", "man", "hate_man"), ("hate", "boy", "hate_boy"), ("love", "car", "love_car")] sentences = ["man_hate_boy", "car_hate_boy", "boy_hate_boy", "man_hate_man", "car_hate_man", "boy_hate_man", "man_love_boy", "car_love_boy", "boy_love_boy", "man_love_car", "car_love_car", "boy_love_car"] n_mat = DenseMatrix(np.mat([[3, 4], [1, 2], [5, 6]])) n_space = Space(n_mat, ["man", "car", "boy"], self.ft) s1_mat = (vo11_mat * n_mat.transpose()).transpose() s2_mat = (vo12_mat * n_mat.transpose()).transpose() s3_mat = (vo21_mat * n_mat.transpose()).transpose() s4_mat = (vo22_mat * n_mat.transpose()).transpose() s_mat = vo11_mat.nary_vstack([s1_mat, s2_mat, s3_mat, s4_mat]) s_space = Space(s_mat, sentences, self.ft) #test train 2d model = LexicalFunction(learner=LstsqRegressionLearner(intercept=False)) model.train(train_vo_data, n_space, s_space) vo_space = model.function_space self.assertListEqual(vo_space.id2row, ["hate_boy", "hate_man", "love_boy", "love_car"]) self.assertTupleEqual(vo_space.element_shape, (2, 2)) vo11_mat.reshape((1, 4)) np.testing.assert_array_almost_equal(vo11_mat.mat, vo_space.cooccurrence_matrix.mat[0]) vo12_mat.reshape((1, 4)) np.testing.assert_array_almost_equal(vo12_mat.mat, vo_space.cooccurrence_matrix.mat[1]) vo21_mat.reshape((1, 4)) np.testing.assert_array_almost_equal(vo21_mat.mat, vo_space.cooccurrence_matrix.mat[2]) vo22_mat.reshape((1, 4)) np.testing.assert_array_almost_equal(vo22_mat.mat, vo_space.cooccurrence_matrix.mat[3]) # test train 3d model2 = LexicalFunction(learner=LstsqRegressionLearner(intercept=False)) model2.train(train_v_data, n_space, vo_space) v_space = model2.function_space np.testing.assert_array_almost_equal(v_mat.mat, v_space.cooccurrence_matrix.mat) self.assertListEqual(v_space.id2row, ["hate", "love"]) self.assertTupleEqual(v_space.element_shape, (2, 2, 2)) # test compose 3d vo_space2 = model2.compose(train_v_data, n_space) id2row1 = list(vo_space.id2row) id2row2 = list(vo_space2.id2row) id2row2.sort() self.assertListEqual(id2row1, id2row2) row_list = vo_space.id2row vo_rows1 = vo_space.get_rows(row_list) vo_rows2 = vo_space2.get_rows(row_list) np.testing.assert_array_almost_equal(vo_rows1.mat, vo_rows2.mat, 7) self.assertTupleEqual(vo_space.element_shape, vo_space2.element_shape)