Python LstsqRegressionLearner.has_intercept 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: composes.utils.regression_learner

메소드/함수: has_intercept

hotexamples.com에서의 예제들: 2

Python LstsqRegressionLearner.has_intercept - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 composes.utils.regression_learner.LstsqRegressionLearner.has_intercept에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

LstsqRegressionLearner(13)

has_intercept(2)

train(1)

예제 #1

파일 보기

파일: lexical_function.py 프로젝트: georgiana-dinu/dissect

class LexicalFunction(CompositionModel):
    """
    Implements the lexical function compositional model.
    
        :math:`\\vec{p} = U \\vec{v}`
     
    where :math:`\\vec{p}` is the vector of the composed phrase,
    :math:`U` is the matrix representation of the first component (the lexical function)
    and :math:`\\vec{v}` is the vector representation of the second component
          
    """ 
         
    _name = "lexical_function"
    _MIN_SAMPLES = 1

    def __init__(self, **kwargs):
        """
        Constructor.
        
        Args:
            function_space= : function space parameter, containing
            the lexical functions, of type Space. Optional, can be set through
            training.
            
            intercept= : True/False, True if the function space has intercept.
            Optional, default False. When training is used, intercept is set 
            to the intercept value of the regression learner used.
        
            learner= : regression method of type RegressionLearner. Optional,
            default LstsqRegressionLearner.
            
        """
        assert_valid_kwargs(kwargs, ["function_space", "intercept", "learner"])
        
        self._regression_learner = LstsqRegressionLearner()
        self.composed_id2column = []
        self._function_space = None
        self._has_intercept = False
        
        if "function_space" in kwargs:
            space = kwargs["function_space"]
            if not isinstance(space, Space):
                raise TypeError("expected Space-type argument, received:" 
                                 % type(space))
            self._function_space = kwargs["function_space"]

        if "intercept" in kwargs:
            has_intercept = kwargs["intercept"]
            if not isinstance(has_intercept, bool):
                raise TypeError("expected bool-type argument, received:" 
                                 % type(has_intercept))
            self._has_intercept = has_intercept
            
        if "learner" in kwargs:
            if "function_space" in kwargs:
                raise ValueError("cannot instantiate with both learner and function_space!")
             
            self._regression_learner = kwargs["learner"] 
        
    def train(self, train_data, arg_space, phrase_space):
        """
        Trains a lexical function composition model to learn a function
        space and sets the function_space parameter. 
                
        Args:
            train_data: list of string tuples. Each tuple contains 3 
            string elements: (function_word, arg, phrase).
            
            arg_space: argument space, of type Space. arg elements of 
            train data are interpreted in this space.
        
            phrase space: phrase space, of type Space. phrase elements of 
            the train data are interpreted in this space.
            
        Training tuples which contain strings not found in their 
        respective spaces are ignored. Function words containing less than
        _MIN_SAMPLES training instances are ignored. For example, if
        _MIN_SAMPLES=2 and function word "red" occurs in only one phrase, "red"
        is ignored.
        
        The id2column attribute of the resulted composed space is set to
        be equal to that of the phrase space given as an input.
        """
        
        start = time.time()

        self._has_intercept = self._regression_learner.has_intercept()

        if not isinstance(arg_space, Space):
            raise ValueError("expected one input spaces!")  
               
        result_mats = []
               
        train_data = sorted(train_data, key=lambda tup: tup[0])
        function_word_list, arg_list, phrase_list = self.valid_data_to_lists(train_data,
                                                                             (None,
                                                                              arg_space.row2id,
                                                                              phrase_space.row2id))
        #partitions the sorted input data
        keys, key_ranges = get_partitions(function_word_list, self._MIN_SAMPLES)
        
        if not keys:
            raise ValueError("No valid training data found!")
                
        assert(len(arg_space.element_shape) == 1)
        
        if self._has_intercept:
            new_element_shape = phrase_space.element_shape + (arg_space.element_shape[0] + 1,)
        else:
            new_element_shape = phrase_space.element_shape + (arg_space.element_shape[0],)
            
        for i in xrange(len(key_ranges)):
            
            idx_beg, idx_end = key_ranges[i]
            
            print ("Training lexical function...%s with %d samples" 
                     % (keys[i], idx_end - idx_beg))
                            
            arg_mat = arg_space.get_rows(arg_list[idx_beg:idx_end]) 
            phrase_mat = phrase_space.get_rows(phrase_list[idx_beg:idx_end])
 
            #convert them to the same type
            matrix_type = get_type_of_largest([arg_mat, phrase_mat])
            [arg_mat, phrase_mat] = resolve_type_conflict([arg_mat, phrase_mat],
                                                          matrix_type)

            result_mat = self._regression_learner.train(arg_mat, phrase_mat).transpose()
            
            result_mat.reshape((1, np.prod(new_element_shape)))
            
            result_mats.append(result_mat)

        new_space_mat = arg_mat.nary_vstack(result_mats)
        
        self.composed_id2column = phrase_space.id2column
            
        self._function_space = Space(new_space_mat, keys, [], 
                                     element_shape=new_element_shape)
        
        log.print_composition_model_info(logger, self, 1, "\nTrained composition model:")
        log.print_info(logger, 3, "Trained: %s lexical functions" % len(keys))
        log.print_info(logger, 3, "With total data points:%s" % len(function_word_list))
        log.print_matrix_info(logger, arg_space.cooccurrence_matrix, 3, 
                              "Semantic space of arguments:")
        log.print_info(logger, 3, "Shape of lexical functions learned:%s" 
                       % (new_element_shape,))
        log.print_matrix_info(logger, new_space_mat, 3, 
                              "Semantic space of lexical functions:")
        log.print_time_info(logger, time.time(), start, 2)
        
    def compose(self, data, arg_space):
        """
        Uses a lexical function composition model to compose elements.
        
        Args:
            data: data to be composed. List of tuples, each containing 3
            strings: (function_word, arg, composed_phrase). function_word and 
            arg are the elements to be composed and composed_phrase is the 
            string associated to their composition. function_word elements
            are interpreted in self.function_space. 
            
            arg_space: argument space, of type Space. arg elements of data are 
            interpreted in this space. 
        
        Returns:
            composed space: a new object of type Space, containing the 
            phrases obtained through composition.
            
        """
        start = time.time()
        
        assert_is_instance(arg_space, Space)
        arg1_list, arg2_list, phrase_list = self.valid_data_to_lists(data,
                                                                     (self._function_space.row2id,
                                                                      arg_space.row2id,
                                                                      None))

        composed_vec_list = []
        for i in xrange(len(arg1_list)):
            arg1_vec = self._function_space.get_row(arg1_list[i])
            arg2_vec = arg_space.get_row(arg2_list[i])
        
            matrix_type = get_type_of_largest([arg1_vec, arg2_vec])
            [arg1_vec, arg2_vec] = resolve_type_conflict([arg1_vec, arg2_vec],
                                                              matrix_type)
                
            composed_ph_vec = self._compose(arg1_vec, arg2_vec,
                                            self._function_space.element_shape)

            composed_vec_list.append(composed_ph_vec)
        
        result_element_shape = self._function_space.element_shape[0:-1]
        composed_ph_mat = composed_ph_vec.nary_vstack(composed_vec_list)
        
        log.print_name(logger, self, 1, "\nComposed with composition model:")
        log.print_info(logger, 3, "Composed total data points:%s" % len(arg1_list))
        log.print_info(logger, 3, "Functional shape of the resulted (composed) elements:%s" 
                       % (result_element_shape,))
        log.print_matrix_info(logger, composed_ph_mat, 4, 
                              "Resulted (composed) semantic space:")
        log.print_time_info(logger, time.time(), start, 2)
        
        return Space(composed_ph_mat, phrase_list, self.composed_id2column, 
                     element_shape = result_element_shape)
    
        
    def _compose(self, function_arg_vec, arg_vec, function_arg_element_shape):

        new_shape = (np.prod(function_arg_element_shape[0:-1]), 
                            function_arg_element_shape[-1])

        function_arg_vec.reshape(new_shape)

        if self._has_intercept:
            comp_el = function_arg_vec * padd_matrix(arg_vec.transpose(), 0)
        else:
            comp_el = function_arg_vec * arg_vec.transpose()
            
        return comp_el.transpose()
            
    @classmethod
    def _assert_space_match(cls, arg1_space, arg2_space, phrase_space=None):
        pass
 
    def set_regression_learner(self, regression_learner):
        assert_is_instance(regression_learner, RegressionLearner)
        self._regression_learner = regression_learner
        
    def get_regression_learner(self):
        return self._regression_learner
    
    regression_learner = property(get_regression_learner, set_regression_learner)  
    """
    Regression method to be used in training, of type RegressionLearner.
    Default is RidgeRegressionLearner(param=1).
    """
       
    def get_function_space(self):
        return self._function_space
    
    function_space = property(get_function_space)
    """
    Function space parameter, containing the lexical functions, of type Space. 
    Can be set through training or through initialization, default None.
    """        

    def get_has_intercept(self):
        return self._has_intercept
    
    has_intercept = property(get_has_intercept)
    """
    Has intercept parameter, boolean. If True, then the function_space is 
    assumed to contain intercept. Can be set through training or through 
    initialization, default is assumed to be False.
    """   
    
    def set_min_samples(self, min_samples):
        if not is_integer(min_samples):
            raise ValueError("expected %s min_samples value, received %s"
                             % ("integer", type(min_samples)))
        self._MIN_SAMPLES = min_samples
        
    def get_min_samples(self):
        return self._MIN_SAMPLES
    
    MIN_SAMPLES = property(get_min_samples, set_min_samples)
    """
    Minimal number of samples for each training instance. Default 3.
    """
            
    def _export(self, filename):
        if self._function_space is None:
            raise IllegalStateError("cannot export an untrained LexicalFunction model.")
        self._function_space.export(filename, format="dm")

예제 #2

파일 보기

파일: full_additive.py 프로젝트: dimazest/dissect

class FullAdditive(CompositionModel):
    """
    Implements the full additive compositional model:

        :math:`\\vec{p} = A \\vec{u} + B \\vec{v}`

    where :math:`\\vec{p}` is the vector of the composed phrase,
    :math:`\\vec{u}, \\vec{v}`, the vectors of the components
    and :math:`A`, :math:`B` are two matrices.

    """
    _name = "full_additive"
    _mat_a_t = None
    _mat_b_t = None


    def __init__(self, **kwargs):
        #TODO here; very important, should be able to set the intercept
        #when mat a and mat b are given , to true or false. now by default is
        #is false
        """
        Constructor.

        Args:
            A= : matrix A, of matrix-like type (Matrix, ndarray,
            numpy matrix, scipy matrix). Optional (parameters can be set
            through training.)

            B= : matrix B, matrix-like type. Optional.

            learner= : regression learner object, of type RegressionLearner.
            Optional, default LstsqRegressionLearner.
        """
        assert_valid_kwargs(kwargs, ["A", "B", "learner"])

        if "A" in kwargs and "B" in kwargs:
            mat_a = kwargs["A"]
            mat_b = kwargs["B"]
            if not is_array_or_matrix(mat_a):
                raise TypeError("expected matrix type, received: %s"
                                % type(mat_a))

            if not is_array_or_matrix(mat_b):
                raise TypeError("expected matrix type, received: %s"
                                % type(mat_b))

            mat_a, mat_b = to_compatible_matrix_types(mat_a, mat_b)
            self._mat_a_t = mat_a.transpose()
            self._mat_b_t = mat_b.transpose()
            self._has_intercept = False

        else:
            self._regression_learner = LstsqRegressionLearner()
            if "learner" in kwargs:
                self._regression_learner = kwargs["learner"]
            self._has_intercept = self._regression_learner.has_intercept()


    def _train(self, arg1_mat, arg2_mat, phrase_mat):

        self._has_intercept = self._regression_learner.has_intercept()

        result = self._regression_learner.train(arg1_mat.hstack(arg2_mat), phrase_mat)

        self._mat_a_t = result[0:arg1_mat.shape[1], :]
        self._mat_b_t = result[arg1_mat.shape[1]:, :]


    def _compose(self, arg1_mat, arg2_mat):
        #NOTE when we get in this compose arg1 mat and arg2 mat have the same type
        [mat_a_t, mat_b_t, arg1_mat] = resolve_type_conflict([self._mat_a_t,
                                                              self._mat_b_t,
                                                              arg1_mat],
                                                             type(arg1_mat))
        if self._has_intercept:
            return arg1_mat * mat_a_t + padd_matrix(arg2_mat, 1) * mat_b_t
        else:
            return arg1_mat * mat_a_t + arg2_mat * mat_b_t

    def set_regression_learner(self, regression_learner):
        assert_is_instance(regression_learner, RegressionLearner)
        self._regression_learner = regression_learner

    def get_regression_learner(self):
        return self._regression_learner

    regression_learner = property(get_regression_learner, set_regression_learner)
    """
    Regression method to be used in training, of type RegressionLearner.
    Default is LstsqRegressionLearner.
    """

    def _build_id2column(self, arg1_space, arg2_space):
        return []

    def _export(self, filename):
        if self._mat_a_t is None or self._mat_b_t is None:
            raise IllegalStateError("cannot export an untrained FullAdditive model.")

        with open(filename, "w") as output_stream:
            output_stream.write("A\n")
            output_stream.write(str(DenseMatrix(self._mat_a_t).mat.T))
            output_stream.write("\nB\n")

            if self._has_intercept:
                output_stream.write(str(DenseMatrix(self._mat_b_t[:-1,]).mat.T))
                output_stream.write("\nIntercept\n")
                output_stream.write(str(DenseMatrix(self._mat_b_t[-1,]).mat.T))
            else:
                output_stream.write(str(DenseMatrix(self._mat_b_t).mat.T))


    def get_mat_a_t(self):
        return self._mat_a_t
    mat_a_t = property(get_mat_a_t)
    """
    Transpose of matrix A parameter, of type Matrix.
    """

    def get_mat_b_t(self):
        return self._mat_b_t
    mat_b_t = property(get_mat_b_t)
    """