def project(self, matrix_): """ Projects a dim. reduction operation. Args: matrix_: matrix on which the reduction is projected, of type Matrix Returns: the reduced matrix Uses the transformation matrix stored in the operation object to project the dimensionality reduction method on a new space, peripheral to the original one. """ if self.__transmat is None: self._raise_projection_error(self.__dim_reduction) if self.__dim_reduction.name == "nmf": matrix_.assert_positive() if not isinstance(matrix_, type(self.__transmat)): warn("WARNING: peripheral matrix type (dense/sparse) should be the same as the core space matrix type!!") [matrix_, transmat] = resolve_type_conflict([matrix_, self.__transmat], type(matrix_)) result_mat = matrix_ * transmat if self.__dim_reduction.name == "nmf": result_mat.to_non_negative() return DenseMatrix(result_mat)
def xxx(self, arg1_space, arg2_space, phrase_space, arg1_list, arg2_list, phrase_list): # we try to achieve at most MAX_MEM_OVERHEAD*phrase_space memory overhead # the /3.0 is needed # because the train data needs 3 * len(train_data) memory (arg1 vector, arg2 vector, phrase vector) chunk_size = int(phrase_space.cooccurrence_matrix.shape[0] * self.MAX_MEM_OVERHEAD / 3.0) + 1 arg1_arg2_dot, arg1_phrase_dot, arg2_phrase_dot, arg1_norm_sqr, arg2_norm_sqr = ( 0, 0, 0, 0, 0) for i in range(len(arg1_list) / chunk_size): beg, end = i * chunk_size, min((i + 1) * chunk_size, len(arg1_list)) arg1_mat = arg1_space.get_rows(arg1_list[beg:end]) arg2_mat = arg2_space.get_rows(arg2_list[beg:end]) phrase_mat = phrase_space.get_rows(phrase_list[beg:end]) [arg1_mat, arg2_mat, phrase_mat ] = resolve_type_conflict([arg1_mat, arg2_mat, phrase_mat], DenseMatrix) res = self._train1(arg1_mat, arg2_mat, phrase_mat) arg1_arg2_dot += res[0] arg1_phrase_dot += res[1] arg2_phrase_dot += res[2] arg1_norm_sqr += res[3] arg2_norm_sqr += res[4] self._train2(arg1_arg2_dot, arg1_phrase_dot, arg2_phrase_dot, arg1_norm_sqr, arg2_norm_sqr)
def project(self, matrix_): """ Projects a dim. reduction operation. Args: matrix_: matrix on which the reduction is projected, of type Matrix Returns: the reduced matrix Uses the transformation matrix stored in the operation object to project the dimensionality reduction method on a new space, peripheral to the original one. """ if self.__transmat is None: self._raise_projection_error(self.__dim_reduction) if self.__dim_reduction.name == "nmf": matrix_.assert_positive() if not isinstance(matrix_, type(self.__transmat)): warn( "WARNING: peripheral matrix type (dense/sparse) should be the same as the core space matrix type!!" ) [matrix_, transmat] = resolve_type_conflict([matrix_, self.__transmat], type(matrix_)) result_mat = matrix_ * transmat if self.__dim_reduction.name == "nmf": result_mat.to_non_negative() return DenseMatrix(result_mat)
def xxx(self, arg1_space, arg2_space, phrase_space, arg1_list, arg2_list, phrase_list): # we try to achieve at most MAX_MEM_OVERHEAD*phrase_space memory overhead # the /3.0 is needed # because the train data needs 3 * len(train_data) memory (arg1 vector, arg2 vector, phrase vector) chunk_size = int(phrase_space.cooccurrence_matrix.shape[0] * self.MAX_MEM_OVERHEAD / 3.0) + 1 arg1_arg2_dot, arg1_phrase_dot, arg2_phrase_dot, arg1_norm_sqr, arg2_norm_sqr = (0, 0, 0, 0, 0) for i in range(len(arg1_list) / chunk_size): beg, end = i*chunk_size, min((i+1)*chunk_size, len(arg1_list)) arg1_mat = arg1_space.get_rows(arg1_list[beg:end]) arg2_mat = arg2_space.get_rows(arg2_list[beg:end]) phrase_mat = phrase_space.get_rows(phrase_list[beg:end]) [arg1_mat, arg2_mat, phrase_mat] = resolve_type_conflict([arg1_mat, arg2_mat, phrase_mat], DenseMatrix) res = self._train1(arg1_mat, arg2_mat, phrase_mat) arg1_arg2_dot += res[0] arg1_phrase_dot += res[1] arg2_phrase_dot += res[2] arg1_norm_sqr += res[3] arg2_norm_sqr += res[4] self._train2(arg1_arg2_dot, arg1_phrase_dot, arg2_phrase_dot, arg1_norm_sqr, arg2_norm_sqr)
def compose(self, data, arg_space): """ Uses a composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the elements to be composed and composed_phrase is the string associated to their composition. arg_space: argument space(s). Space object or a tuple of two Space objects (e.g. my_space, or (my_space1, my_space2)). If two spaces are provided, arg1 elements of data are interpreted in space1, and arg2 in space2. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() arg1_space, arg2_space = self.extract_arg_spaces(arg_space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data, (arg1_space.row2id, arg2_space.row2id, None)) # we try to achieve at most MAX_MEM_OVERHEAD*phrase_space memory overhead # the /3.0 is needed # because the composing data needs 3 * len(train_data) memory (arg1 vector, arg2 vector, phrase vector) chunk_size = int(max(arg1_space.cooccurrence_matrix.shape[0],arg2_space.cooccurrence_matrix.shape[0],len(phrase_list)) * self.MAX_MEM_OVERHEAD / 3.0) + 1 composed_mats = [] for i in range(int(math.ceil(len(arg1_list) / float(chunk_size)))): beg, end = i*chunk_size, min((i+1)*chunk_size, len(arg1_list)) arg1_mat = arg1_space.get_rows(arg1_list[beg:end]) arg2_mat = arg2_space.get_rows(arg2_list[beg:end]) [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat], DenseMatrix) composed_mat = self._compose(arg1_mat, arg2_mat) composed_mats.append(composed_mat) composed_phrase_mat = composed_mat.nary_vstack(composed_mats) if self.composed_id2column is None: self.composed_id2column = self._build_id2column(arg1_space, arg2_space) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % arg1_mat.shape[0]) log.print_matrix_info(logger, composed_phrase_mat, 4, "Resulted (composed) semantic space::") log.print_time_info(logger, time.time(), start, 2) return Space(composed_phrase_mat, phrase_list, self.composed_id2column)
def compose(self, data, arg_space): """ Uses a lexical function composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (function_word, arg, composed_phrase). function_word and arg are the elements to be composed and composed_phrase is the string associated to their composition. function_word elements are interpreted in self.function_space. arg_space: argument space, of type Space. arg elements of data are interpreted in this space. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() assert_is_instance(arg_space, Space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists( data, (self._function_space.row2id, arg_space.row2id, None)) composed_vec_list = [] for i in range(len(arg1_list)): arg1_vec = self._function_space.get_row(arg1_list[i]) arg2_vec = arg_space.get_row(arg2_list[i]) matrix_type = get_type_of_largest([arg1_vec, arg2_vec]) [arg1_vec, arg2_vec] = resolve_type_conflict([arg1_vec, arg2_vec], matrix_type) composed_ph_vec = self._compose(arg1_vec, arg2_vec, self._function_space.element_shape) composed_vec_list.append(composed_ph_vec) result_element_shape = self._function_space.element_shape[0:-1] composed_ph_mat = composed_ph_vec.nary_vstack(composed_vec_list) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % len(arg1_list)) log.print_info( logger, 3, "Functional shape of the resulted (composed) elements:%s" % (result_element_shape, )) log.print_matrix_info(logger, composed_ph_mat, 4, "Resulted (composed) semantic space:") log.print_time_info(logger, time.time(), start, 2) return Space(composed_ph_mat, phrase_list, self.composed_id2column, element_shape=result_element_shape)
def _compose(self, arg1_mat, arg2_mat): #NOTE when we get in this compose arg1 mat and arg2 mat have the same type [mat_a_t, mat_b_t, arg1_mat ] = resolve_type_conflict([self._mat_a_t, self._mat_b_t, arg1_mat], type(arg1_mat)) if self._has_intercept: return arg1_mat * mat_a_t + padd_matrix(arg2_mat, 1) * mat_b_t else: return arg1_mat * mat_a_t + arg2_mat * mat_b_t
def _compose(self, arg1_mat, arg2_mat): #NOTE when we get in this compose arg1 mat and arg2 mat have the same type [mat_a_t, mat_b_t, arg1_mat] = resolve_type_conflict([self._mat_a_t, self._mat_b_t, arg1_mat], type(arg1_mat)) if self._has_intercept: return arg1_mat * mat_a_t + padd_matrix(arg2_mat, 1) * mat_b_t else: return arg1_mat * mat_a_t + arg2_mat * mat_b_t
def compose(self, data, arg_space): """ Uses a lexical function composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (function_word, arg, composed_phrase). function_word and arg are the elements to be composed and composed_phrase is the string associated to their composition. function_word elements are interpreted in self.function_space. arg_space: argument space, of type Space. arg elements of data are interpreted in this space. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() assert_is_instance(arg_space, Space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data, (self._function_space.row2id, arg_space.row2id, None)) composed_vec_list = [] for i in xrange(len(arg1_list)): arg1_vec = self._function_space.get_row(arg1_list[i]) arg2_vec = arg_space.get_row(arg2_list[i]) matrix_type = get_type_of_largest([arg1_vec, arg2_vec]) [arg1_vec, arg2_vec] = resolve_type_conflict([arg1_vec, arg2_vec], matrix_type) composed_ph_vec = self._compose(arg1_vec, arg2_vec, self._function_space.element_shape) composed_vec_list.append(composed_ph_vec) result_element_shape = self._function_space.element_shape[0:-1] composed_ph_mat = composed_ph_vec.nary_vstack(composed_vec_list) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % len(arg1_list)) log.print_info(logger, 3, "Functional shape of the resulted (composed) elements:%s" % (result_element_shape,)) log.print_matrix_info(logger, composed_ph_mat, 4, "Resulted (composed) semantic space:") log.print_time_info(logger, time.time(), start, 2) return Space(composed_ph_mat, phrase_list, self.composed_id2column, element_shape = result_element_shape)
def _train(self, arg1_space, arg2_space, phrase_space, arg1_list, arg2_list, phrase_list): arg1_mat = arg1_space.get_rows(arg1_list) arg2_mat = arg2_space.get_rows(arg2_list) phrase_mat = phrase_space.get_rows(phrase_list) [arg1_mat, arg2_mat, phrase_mat] = resolve_type_conflict([arg1_mat, arg2_mat, phrase_mat], DenseMatrix) self._solve(arg1_mat, arg2_mat, phrase_mat)
def vstack(cls, space1, space2): """ Classmethod. Stacks two semantic spaces. The rows in the two spaces are concatenated. Args: space1, space2: spaces to be stacked, of type Space Returns: Stacked space, type Space. Raises: ValueError: if the spaces have different number of columns or their columns are not identical """ if space1.cooccurrence_matrix.shape[ 1] != space2.cooccurrence_matrix.shape[1]: raise ValueError("Inconsistent shapes: %s, %s" % (space1.cooccurrence_matrix.shape[1], space2.cooccurrence_matrix.shape[1])) if space1.id2column != space2.id2column: raise ValueError("Identical columns required") new_row2id = add_items_to_dict(space1.row2id.copy(), space2.id2row) new_id2row = space1.id2row + space2.id2row matrix_type = get_type_of_largest( [space1.cooccurrence_matrix, space2.cooccurrence_matrix]) [new_mat1, new_mat2] = resolve_type_conflict( [space1.cooccurrence_matrix, space2.cooccurrence_matrix], matrix_type) new_mat = new_mat1.vstack(new_mat2) log.print_info(logger, 1, "\nVertical stack of two spaces") log.print_matrix_info(logger, space1.cooccurrence_matrix, 2, "Semantic space 1:") log.print_matrix_info(logger, space2.cooccurrence_matrix, 2, "Semantic space 2:") log.print_matrix_info(logger, new_mat, 2, "Resulted semantic space:") return Space(new_mat, new_id2row, list(space1.id2column), new_row2id, space1.column2id.copy(), operations=[])
def test_resolve_type_conflict(self): arr = np.mat([1,2]) a = DenseMatrix(arr) b = SparseMatrix(arr) [c,d] = resolve_type_conflict([a,b], DenseMatrix) [e,f,g] = resolve_type_conflict([b,a,a], DenseMatrix) h = resolve_type_conflict([], DenseMatrix) [u,v] = resolve_type_conflict([arr, csr_matrix(arr)], DenseMatrix) self.assertIsInstance(c, DenseMatrix) self.assertIsInstance(d, DenseMatrix) self.assertIsInstance(e, DenseMatrix) self.assertIsInstance(f, DenseMatrix) self.assertIsInstance(g, DenseMatrix) self.assertListEqual([], h) self.assertIsInstance(g, DenseMatrix) self.assertIsInstance(u, DenseMatrix) self.assertIsInstance(v, DenseMatrix)
def vstack(cls, space1, space2): """ Classmethod. Stacks two semantic spaces. The rows in the two spaces are concatenated. Args: space1, space2: spaces to be stacked, of type Space Returns: Stacked space, type Space. Raises: ValueError: if the spaces have different number of columns or their columns are not identical """ if space1.cooccurrence_matrix.shape[1] != space2.cooccurrence_matrix.shape[1]: raise ValueError("Inconsistent shapes: %s, %s" % (space1.cooccurrence_matrix.shape[1], space2.cooccurrence_matrix.shape[1])) if space1.id2column != space2.id2column: raise ValueError("Identical columns required") new_row2id = add_items_to_dict(space1.row2id.copy(), space2.id2row) new_id2row = space1.id2row + space2.id2row matrix_type = get_type_of_largest([space1.cooccurrence_matrix, space2.cooccurrence_matrix]) [new_mat1, new_mat2] = resolve_type_conflict([space1.cooccurrence_matrix, space2.cooccurrence_matrix], matrix_type) new_mat = new_mat1.vstack(new_mat2) log.print_info(logger, 1, "\nVertical stack of two spaces") log.print_matrix_info(logger, space1.cooccurrence_matrix, 2, "Semantic space 1:") log.print_matrix_info(logger, space2.cooccurrence_matrix, 2, "Semantic space 2:") log.print_matrix_info(logger, new_mat, 2, "Resulted semantic space:") return Space(new_mat, new_id2row, list(space1.id2column), new_row2id, space1.column2id.copy(), operations=[])
def compose(self, data, arg_space): """ Uses a composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the elements to be composed and composed_phrase is the string associated to their composition. arg_space: argument space(s). Space object or a tuple of two Space objects (e.g. my_space, or (my_space1, my_space2)). If two spaces are provided, arg1 elements of data are interpreted in space1, and arg2 in space2. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() arg1_space, arg2_space = self.extract_arg_spaces(arg_space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists( data, (arg1_space.row2id, arg2_space.row2id, None)) arg1_mat = arg1_space.get_rows(arg1_list) arg2_mat = arg2_space.get_rows(arg2_list) [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat], DenseMatrix) composed_phrase_mat = self._compose(arg1_mat, arg2_mat) if self.composed_id2column is None: self.composed_id2column = self._build_id2column( arg1_space, arg2_space) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % arg1_mat.shape[0]) log.print_matrix_info(logger, composed_phrase_mat, 4, "Resulted (composed) semantic space::") log.print_time_info(logger, time.time(), start, 2) return Space(composed_phrase_mat, phrase_list, self.composed_id2column)
def compose(self, data, arg_space): """ Uses a composition model to compose elements. Args: data: data to be composed. List of tuples, each containing 3 strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the elements to be composed and composed_phrase is the string associated to their composition. arg_space: argument space(s). Space object or a tuple of two Space objects (e.g. my_space, or (my_space1, my_space2)). If two spaces are provided, arg1 elements of data are interpreted in space1, and arg2 in space2. Returns: composed space: a new object of type Space, containing the phrases obtained through composition. """ start = time.time() arg1_space, arg2_space = self.extract_arg_spaces(arg_space) arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data, (arg1_space.row2id, arg2_space.row2id, None)) arg1_mat = arg1_space.get_rows(arg1_list) arg2_mat = arg2_space.get_rows(arg2_list) [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat], DenseMatrix) composed_phrase_mat = self._compose(arg1_mat, arg2_mat) if self.composed_id2column is None: self.composed_id2column = self._build_id2column(arg1_space, arg2_space) log.print_name(logger, self, 1, "\nComposed with composition model:") log.print_info(logger, 3, "Composed total data points:%s" % arg1_mat.shape[0]) log.print_matrix_info(logger, composed_phrase_mat, 4, "Resulted (composed) semantic space::") log.print_time_info(logger, time.time(), start, 2) return Space(composed_phrase_mat, phrase_list, self.composed_id2column)
def get_sim(self, word1, word2, similarity, space2=None): """ Computes the similarity between two targets in the semantic space. If one of the two targets to be compared is not found, it returns 0.. Args: word1: string word2: string similarity: of type Similarity, the similarity measure to be used space2: Space type, Optional. If provided, word2 is interpreted in this space, rather than the current space. Default, both words are interpreted in the current space. Returns: scalar, similarity score """ assert_is_instance(similarity, Similarity) try: v1 = self.get_row(word1) except KeyError: print("Row string %s not found, returning 0.0" % (word1)) return 0.0 try: if space2 is None: v2 = self.get_row(word2) else: v2 = space2.get_row(word2) except KeyError: print("Row string %s not found, returning 0.0" % (word2)) return 0.0 [v1, v2] = resolve_type_conflict([v1, v2], DenseMatrix) return similarity.get_sim(v1, v2)
def get_sim(self, word1, word2, similarity, space2=None): """ Computes the similarity between two targets in the semantic space. If one of the two targets to be compared is not found, it returns 0.. Args: word1: string word2: string similarity: of type Similarity, the similarity measure to be used space2: Space type, Optional. If provided, word2 is interpreted in this space, rather than the current space. Default, both words are interpreted in the current space. Returns: scalar, similarity score """ assert_is_instance(similarity, Similarity) try: v1 = self.get_row(word1) except KeyError: print "Row string %s not found, returning 0.0" % (word1) return 0.0 try: if space2 is None: v2 = self.get_row(word2) else: v2 = space2.get_row(word2) except KeyError: print "Row string %s not found, returning 0.0" % (word2) return 0.0 [v1, v2] = resolve_type_conflict([v1, v2], DenseMatrix) return similarity.get_sim(v1, v2)
def train(self, train_data, arg_space, phrase_space): """ Trains a lexical function composition model to learn a function space and sets the function_space parameter. Args: train_data: list of string tuples. Each tuple contains 3 string elements: (function_word, arg, phrase). arg_space: argument space, of type Space. arg elements of train data are interpreted in this space. phrase space: phrase space, of type Space. phrase elements of the train data are interpreted in this space. Training tuples which contain strings not found in their respective spaces are ignored. Function words containing less than _MIN_SAMPLES training instances are ignored. For example, if _MIN_SAMPLES=2 and function word "red" occurs in only one phrase, "red" is ignored. The id2column attribute of the resulted composed space is set to be equal to that of the phrase space given as an input. """ start = time.time() self._has_intercept = self._regression_learner.has_intercept() if not isinstance(arg_space, Space): raise ValueError("expected one input spaces!") result_mats = [] train_data = sorted(train_data, key=lambda tup: tup[0]) function_word_list, arg_list, phrase_list = self.valid_data_to_lists(train_data, (None, arg_space.row2id, phrase_space.row2id)) #partitions the sorted input data keys, key_ranges = get_partitions(function_word_list, self._MIN_SAMPLES) if not keys: raise ValueError("No valid training data found!") assert(len(arg_space.element_shape) == 1) if self._has_intercept: new_element_shape = phrase_space.element_shape + (arg_space.element_shape[0] + 1,) else: new_element_shape = phrase_space.element_shape + (arg_space.element_shape[0],) for i in xrange(len(key_ranges)): idx_beg, idx_end = key_ranges[i] print ("Training lexical function...%s with %d samples" % (keys[i], idx_end - idx_beg)) arg_mat = arg_space.get_rows(arg_list[idx_beg:idx_end]) phrase_mat = phrase_space.get_rows(phrase_list[idx_beg:idx_end]) #convert them to the same type matrix_type = get_type_of_largest([arg_mat, phrase_mat]) [arg_mat, phrase_mat] = resolve_type_conflict([arg_mat, phrase_mat], matrix_type) result_mat = self._regression_learner.train(arg_mat, phrase_mat).transpose() result_mat.reshape((1, np.prod(new_element_shape))) result_mats.append(result_mat) new_space_mat = arg_mat.nary_vstack(result_mats) self.composed_id2column = phrase_space.id2column self._function_space = Space(new_space_mat, keys, [], element_shape=new_element_shape) log.print_composition_model_info(logger, self, 1, "\nTrained composition model:") log.print_info(logger, 3, "Trained: %s lexical functions" % len(keys)) log.print_info(logger, 3, "With total data points:%s" % len(function_word_list)) log.print_matrix_info(logger, arg_space.cooccurrence_matrix, 3, "Semantic space of arguments:") log.print_info(logger, 3, "Shape of lexical functions learned:%s" % (new_element_shape,)) log.print_matrix_info(logger, new_space_mat, 3, "Semantic space of lexical functions:") log.print_time_info(logger, time.time(), start, 2)
def train(self, train_data, arg_space, phrase_space): """ Trains a lexical function composition model to learn a function space and sets the function_space parameter. Args: train_data: list of string tuples. Each tuple contains 3 string elements: (function_word, arg, phrase). arg_space: argument space, of type Space. arg elements of train data are interpreted in this space. phrase space: phrase space, of type Space. phrase elements of the train data are interpreted in this space. Training tuples which contain strings not found in their respective spaces are ignored. Function words containing less than _MIN_SAMPLES training instances are ignored. For example, if _MIN_SAMPLES=2 and function word "red" occurs in only one phrase, "red" is ignored. The id2column attribute of the resulted composed space is set to be equal to that of the phrase space given as an input. """ start = time.time() self._has_intercept = self._regression_learner.has_intercept() if not isinstance(arg_space, Space): raise ValueError("expected one input spaces!") result_mats = [] train_data = sorted(train_data, key=lambda tup: tup[0]) function_word_list, arg_list, phrase_list = self.valid_data_to_lists( train_data, (None, arg_space.row2id, phrase_space.row2id)) #partitions the sorted input data keys, key_ranges = get_partitions(function_word_list, self._MIN_SAMPLES) if not keys: raise ValueError("No valid training data found!") assert (len(arg_space.element_shape) == 1) if self._has_intercept: new_element_shape = phrase_space.element_shape + ( arg_space.element_shape[0] + 1, ) else: new_element_shape = phrase_space.element_shape + ( arg_space.element_shape[0], ) for i in range(len(key_ranges)): idx_beg, idx_end = key_ranges[i] print(("Training lexical function...%s with %d samples" % (keys[i], idx_end - idx_beg))) arg_mat = arg_space.get_rows(arg_list[idx_beg:idx_end]) phrase_mat = phrase_space.get_rows(phrase_list[idx_beg:idx_end]) #convert them to the same type matrix_type = get_type_of_largest([arg_mat, phrase_mat]) [arg_mat, phrase_mat] = resolve_type_conflict([arg_mat, phrase_mat], matrix_type) result_mat = self._regression_learner.train( arg_mat, phrase_mat).transpose() result_mat.reshape((1, np.prod(new_element_shape))) result_mats.append(result_mat) new_space_mat = arg_mat.nary_vstack(result_mats) self.composed_id2column = phrase_space.id2column self._function_space = Space(new_space_mat, keys, [], element_shape=new_element_shape) log.print_composition_model_info(logger, self, 1, "\nTrained composition model:") log.print_info(logger, 3, "Trained: %s lexical functions" % len(keys)) log.print_info(logger, 3, "With total data points:%s" % len(function_word_list)) log.print_matrix_info(logger, arg_space.cooccurrence_matrix, 3, "Semantic space of arguments:") log.print_info( logger, 3, "Shape of lexical functions learned:%s" % (new_element_shape, )) log.print_matrix_info(logger, new_space_mat, 3, "Semantic space of lexical functions:") log.print_time_info(logger, time.time(), start, 2)