Esempio n. 1
0
    def compose(self, data, arg_space):
        """
        Uses a composition model to compose elements.

        Args:
            data: data to be composed. List of tuples, each containing 3
            strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the
            elements to be composed and composed_phrase is the string associated
            to their composition.

            arg_space: argument space(s). Space object or a tuple of two
            Space objects (e.g. my_space, or (my_space1, my_space2)).
            If two spaces are provided, arg1 elements of data are
            interpreted in space1, and arg2 in space2.

        Returns:
            composed space: a new object of type Space, containing the
            phrases obtained through composition.

        """
        start = time.time()

        arg1_space, arg2_space = self.extract_arg_spaces(arg_space)
        arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data,
                                                                     (arg1_space.row2id,
                                                                      arg2_space.row2id,
                                                                      None))
        
        # we try to achieve at most MAX_MEM_OVERHEAD*phrase_space memory overhead
        # the /3.0 is needed
        # because the composing data needs 3 * len(train_data) memory (arg1 vector, arg2 vector, phrase vector)
        chunk_size = int(max(arg1_space.cooccurrence_matrix.shape[0],arg2_space.cooccurrence_matrix.shape[0],len(phrase_list))
                          * self.MAX_MEM_OVERHEAD / 3.0) + 1
        
        composed_mats = []
        for i in range(int(math.ceil(len(arg1_list) / float(chunk_size)))):
            beg, end = i*chunk_size, min((i+1)*chunk_size, len(arg1_list))

            arg1_mat = arg1_space.get_rows(arg1_list[beg:end])
            arg2_mat = arg2_space.get_rows(arg2_list[beg:end])

            [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat],
                                                                    DenseMatrix)
            composed_mat = self._compose(arg1_mat, arg2_mat)
            composed_mats.append(composed_mat)
        
        composed_phrase_mat = composed_mat.nary_vstack(composed_mats)
        
        if self.composed_id2column is None:
            self.composed_id2column = self._build_id2column(arg1_space, arg2_space)

        log.print_name(logger, self, 1, "\nComposed with composition model:")
        log.print_info(logger, 3, "Composed total data points:%s" % arg1_mat.shape[0])
        log.print_matrix_info(logger, composed_phrase_mat, 4,
                              "Resulted (composed) semantic space::")
        log.print_time_info(logger, time.time(), start, 2)
        
        return Space(composed_phrase_mat, phrase_list, self.composed_id2column)
    def compose(self, data, arg_space):
        """
        Uses a lexical function composition model to compose elements.

        Args:
            data: data to be composed. List of tuples, each containing 3
            strings: (function_word, arg, composed_phrase). function_word and
            arg are the elements to be composed and composed_phrase is the
            string associated to their composition. function_word elements
            are interpreted in self.function_space.

            arg_space: argument space, of type Space. arg elements of data are
            interpreted in this space.

        Returns:
            composed space: a new object of type Space, containing the
            phrases obtained through composition.

        """
        start = time.time()

        assert_is_instance(arg_space, Space)
        arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(
            data, (self._function_space.row2id, arg_space.row2id, None))

        composed_vec_list = []
        for i in range(len(arg1_list)):
            arg1_vec = self._function_space.get_row(arg1_list[i])
            arg2_vec = arg_space.get_row(arg2_list[i])

            matrix_type = get_type_of_largest([arg1_vec, arg2_vec])
            [arg1_vec, arg2_vec] = resolve_type_conflict([arg1_vec, arg2_vec],
                                                         matrix_type)

            composed_ph_vec = self._compose(arg1_vec, arg2_vec,
                                            self._function_space.element_shape)

            composed_vec_list.append(composed_ph_vec)

        result_element_shape = self._function_space.element_shape[0:-1]
        composed_ph_mat = composed_ph_vec.nary_vstack(composed_vec_list)

        log.print_name(logger, self, 1, "\nComposed with composition model:")
        log.print_info(logger, 3,
                       "Composed total data points:%s" % len(arg1_list))
        log.print_info(
            logger, 3,
            "Functional shape of the resulted (composed) elements:%s" %
            (result_element_shape, ))
        log.print_matrix_info(logger, composed_ph_mat, 4,
                              "Resulted (composed) semantic space:")
        log.print_time_info(logger, time.time(), start, 2)

        return Space(composed_ph_mat,
                     phrase_list,
                     self.composed_id2column,
                     element_shape=result_element_shape)
Esempio n. 3
0
    def compose(self, data, arg_space):
        """
        Uses a lexical function composition model to compose elements.
        
        Args:
            data: data to be composed. List of tuples, each containing 3
            strings: (function_word, arg, composed_phrase). function_word and 
            arg are the elements to be composed and composed_phrase is the 
            string associated to their composition. function_word elements
            are interpreted in self.function_space. 
            
            arg_space: argument space, of type Space. arg elements of data are 
            interpreted in this space. 
        
        Returns:
            composed space: a new object of type Space, containing the 
            phrases obtained through composition.
            
        """
        start = time.time()
        
        assert_is_instance(arg_space, Space)
        arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data,
                                                                     (self._function_space.row2id,
                                                                      arg_space.row2id,
                                                                      None))

        composed_vec_list = []
        for i in xrange(len(arg1_list)):
            arg1_vec = self._function_space.get_row(arg1_list[i])
            arg2_vec = arg_space.get_row(arg2_list[i])
        
            matrix_type = get_type_of_largest([arg1_vec, arg2_vec])
            [arg1_vec, arg2_vec] = resolve_type_conflict([arg1_vec, arg2_vec],
                                                              matrix_type)
                
            composed_ph_vec = self._compose(arg1_vec, arg2_vec,
                                            self._function_space.element_shape)

            composed_vec_list.append(composed_ph_vec)
        
        result_element_shape = self._function_space.element_shape[0:-1]
        composed_ph_mat = composed_ph_vec.nary_vstack(composed_vec_list)
        
        log.print_name(logger, self, 1, "\nComposed with composition model:")
        log.print_info(logger, 3, "Composed total data points:%s" % len(arg1_list))
        log.print_info(logger, 3, "Functional shape of the resulted (composed) elements:%s" 
                       % (result_element_shape,))
        log.print_matrix_info(logger, composed_ph_mat, 4, 
                              "Resulted (composed) semantic space:")
        log.print_time_info(logger, time.time(), start, 2)
        
        return Space(composed_ph_mat, phrase_list, self.composed_id2column, 
                     element_shape = result_element_shape)
Esempio n. 4
0
    def get_neighbours(self, word, no_neighbours, similarity, 
                       space2=None):            
        """
        Computes the neighbours of a word in the semantic space.

        Args:
            word: string, target word
            no_neighbours: int, the number of neighbours desired
            similarity: of type Similarity, the similarity measure to be used
            space2: Space type, Optional. If provided, the neighbours are 
                retrieved from this space, rather than the current space. 
                Default, neighbours are retrieved from the current space.
                
        Returns:
            list of (neighbour_string, similarity_value) tuples.
            
        Raises:
            KeyError: if the word is not found in the semantic space.
            
        """
        
        start = time.time()
        assert_is_instance(similarity, Similarity)       
        vector = self.get_row(word)
        
        if space2 is None:
            id2row = self.id2row
            sims_to_matrix = similarity.get_sims_to_matrix(vector, 
                                                          self.cooccurrence_matrix)
        else:
            mat_type = type(space2.cooccurrence_matrix)
            if not isinstance(vector, mat_type):
                vector = mat_type(vector)
            
            sims_to_matrix = similarity.get_sims_to_matrix(vector, 
                                         space2.cooccurrence_matrix)
            id2row = space2.id2row 
        
        sorted_perm = sims_to_matrix.sorted_permutation(sims_to_matrix.sum, 1)
        no_neighbours = min(no_neighbours, len(id2row))
        result = []
                
        for count in range(no_neighbours):
            i = sorted_perm[count]
            result.append((id2row[i], sims_to_matrix[i,0]))

        log.print_info(logger, 1, "\nGetting neighbours of:%s" % (word))
        log.print_name(logger, similarity, 1, "Similarity:")
        log.print_time_info(logger, time.time(), start, 2)
        return result    
Esempio n. 5
0
    def get_neighbours(self, word, no_neighbours, similarity, space2=None):
        """
        Computes the neighbours of a word in the semantic space.

        Args:
            word: string, target word
            no_neighbours: int, the number of neighbours desired
            similarity: of type Similarity, the similarity measure to be used
            space2: Space type, Optional. If provided, the neighbours are
                retrieved from this space, rather than the current space.
                Default, neighbours are retrieved from the current space.

        Returns:
            list of (neighbour_string, similarity_value) tuples.

        Raises:
            KeyError: if the word is not found in the semantic space.

        """

        start = time.time()
        assert_is_instance(similarity, Similarity)
        vector = self.get_row(word)

        if space2 is None:
            id2row = self.id2row
            sims_to_matrix = similarity.get_sims_to_matrix(
                vector, self.cooccurrence_matrix)
        else:
            mat_type = type(space2.cooccurrence_matrix)
            if not isinstance(vector, mat_type):
                vector = mat_type(vector)

            sims_to_matrix = similarity.get_sims_to_matrix(
                vector, space2.cooccurrence_matrix)
            id2row = space2.id2row

        sorted_perm = sims_to_matrix.sorted_permutation(sims_to_matrix.sum, 1)
        no_neighbours = min(no_neighbours, len(id2row))
        result = []

        for count in range(no_neighbours):
            i = sorted_perm[count]
            result.append((id2row[i], sims_to_matrix[i, 0]))

        log.print_info(logger, 1, "\nGetting neighbours of:%s" % (word))
        log.print_name(logger, similarity, 1, "Similarity:")
        log.print_time_info(logger, time.time(), start, 2)
        return result
Esempio n. 6
0
    def compose(self, data, arg_space):
        """
        Uses a composition model to compose elements.

        Args:
            data: data to be composed. List of tuples, each containing 3
            strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the
            elements to be composed and composed_phrase is the string associated
            to their composition.

            arg_space: argument space(s). Space object or a tuple of two
            Space objects (e.g. my_space, or (my_space1, my_space2)).
            If two spaces are provided, arg1 elements of data are
            interpreted in space1, and arg2 in space2.

        Returns:
            composed space: a new object of type Space, containing the
            phrases obtained through composition.

        """
        start = time.time()

        arg1_space, arg2_space = self.extract_arg_spaces(arg_space)
        arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(
            data, (arg1_space.row2id, arg2_space.row2id, None))

        arg1_mat = arg1_space.get_rows(arg1_list)
        arg2_mat = arg2_space.get_rows(arg2_list)

        [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat],
                                                     DenseMatrix)

        composed_phrase_mat = self._compose(arg1_mat, arg2_mat)
        if self.composed_id2column is None:
            self.composed_id2column = self._build_id2column(
                arg1_space, arg2_space)

        log.print_name(logger, self, 1, "\nComposed with composition model:")
        log.print_info(logger, 3,
                       "Composed total data points:%s" % arg1_mat.shape[0])
        log.print_matrix_info(logger, composed_phrase_mat, 4,
                              "Resulted (composed) semantic space::")
        log.print_time_info(logger, time.time(), start, 2)

        return Space(composed_phrase_mat, phrase_list, self.composed_id2column)
    def compose(self, data, arg_space):
        """
        Uses a composition model to compose elements.
        
        Args:
            data: data to be composed. List of tuples, each containing 3
            strings: (arg1, arg2, composed_phrase). arg1 and arg2 are the 
            elements to be composed and composed_phrase is the string associated
            to their composition.
            
            arg_space: argument space(s). Space object or a tuple of two 
            Space objects (e.g. my_space, or (my_space1, my_space2)). 
            If two spaces are provided, arg1 elements of data are 
            interpreted in space1, and arg2 in space2.
        
        Returns:
            composed space: a new object of type Space, containing the 
            phrases obtained through composition.
            
        """
        start = time.time()
         
        arg1_space, arg2_space = self.extract_arg_spaces(arg_space)
        arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data,
                                                                     (arg1_space.row2id,
                                                                      arg2_space.row2id,
                                                                      None))
                                                                     
        arg1_mat = arg1_space.get_rows(arg1_list)
        arg2_mat = arg2_space.get_rows(arg2_list)
        
        [arg1_mat, arg2_mat] = resolve_type_conflict([arg1_mat, arg2_mat], DenseMatrix) 
        
        composed_phrase_mat = self._compose(arg1_mat, arg2_mat)
        if self.composed_id2column is None:
            self.composed_id2column = self._build_id2column(arg1_space, arg2_space)

        log.print_name(logger, self, 1, "\nComposed with composition model:")
        log.print_info(logger, 3, "Composed total data points:%s" % arg1_mat.shape[0])
        log.print_matrix_info(logger, composed_phrase_mat, 4, 
                              "Resulted (composed) semantic space::")
        log.print_time_info(logger, time.time(), start, 2)
                
        return Space(composed_phrase_mat, phrase_list, self.composed_id2column)