Ejemplo n.º 1
0
    def fit(self, x_train, y_train):
        """ LDF model
        First call QDF model to caculate the mean, cov_matirx
        """
        #import ipdb; ipdb.set_trace()
        self.num_class, y_train, self.label_map = QDF.transform_label(y_train)
        qdf = QDF().fit(x_train, y_train)
        prior = qdf.prior
        mean = qdf.mean
        cov_matrix = qdf.cov_matrix
        #print_cov_matrix(cov_matrix)

        # cacualte the shared covariance matirx
        avg_cov = np.matlib.zeros(cov_matrix[0].shape)
        for i in range(self.num_class):
            avg_cov += (prior[i] * cov_matrix[i])

        self.inverse_cov = avg_cov.getI()  # get the inverse covariance matrix

        num_feature = x_train.shape[1]
        # each column for weight[i]
        weight = np.matrix([0] * num_feature).T
        self.w0 = []
        for i in range(self.num_class):
            wi = 2 * self.inverse_cov.T * mean[i]
            weight = np.hstack((weight, wi))

            wi0 = 2 * math.log(prior[i]) - (mean[i].T * self.inverse_cov *
                                            mean[i])[0, 0]
            self.w0.append(wi0)

        self.weight = weight[:, 1:]

        return self
Ejemplo n.º 2
0
def twostep(train_feature, train_label, test_feature, Classifier, kwargs):
    """
    Classifier: the classifier class
    **kwd: additional model parameters for the classifier
    Return:
    train_pred: 在整个train_data上的准确率,用于查看是否发生过拟合
    test_pred: test_data上的准确率,是最终结果
    """    
    #import ipdb; ipdb.set_trace()
    # twostep method
    model1_feature, model1_label, model2_feature, model2_label = make_twostep_dataset(train_feature, train_label)
    # 构造两个模型
    clf1 = Classifier(**kwargs).fit(model1_feature, model1_label)
    clf2 = Classifier(**kwargs).fit(model2_feature, model2_label)

    train_pred  = twostep_predict(clf1, clf2, train_feature)
    test_pred = twostep_predict(clf1, clf2, test_feature)
    
    # 测试两步模型中每步的准确率
    print 'Step 1 acc:'
    y_pred = clf1.predict(model1_feature)
    print classification_report(model1_label, y_pred)
    print 'Step 2 acc:'
    y_pred = clf2.predict(model2_feature)
    print classification_report(model2_label, y_pred)
    
    return train_pred, test_pred
Ejemplo n.º 3
0
def twostep(train_feature, train_label, test_feature, Classifier, kwargs):
    """
    Classifier: the classifier class
    **kwd: additional model parameters for the classifier
    Return:
    train_pred: 在整个train_data上的准确率,用于查看是否发生过拟合
    test_pred: test_data上的准确率,是最终结果
    """
    #import ipdb; ipdb.set_trace()
    # twostep method
    model1_feature, model1_label, model2_feature, model2_label = make_twostep_dataset(
        train_feature, train_label)
    # 构造两个模型
    clf1 = Classifier(**kwargs).fit(model1_feature, model1_label)
    clf2 = Classifier(**kwargs).fit(model2_feature, model2_label)

    train_pred = twostep_predict(clf1, clf2, train_feature)
    test_pred = twostep_predict(clf1, clf2, test_feature)

    # 测试两步模型中每步的准确率
    print 'Step 1 acc:'
    y_pred = clf1.predict(model1_feature)
    print classification_report(model1_label, y_pred)
    print 'Step 2 acc:'
    y_pred = clf2.predict(model2_feature)
    print classification_report(model2_label, y_pred)

    return train_pred, test_pred
Ejemplo n.º 4
0
 def fit(self, x_train, y_train):
     """ LDF model
     First call QDF model to caculate the mean, cov_matirx
     """
     #import ipdb; ipdb.set_trace()
     self.num_class, y_train, self.label_map = QDF.transform_label(y_train)
     qdf = QDF().fit(x_train, y_train)
     prior = qdf.prior
     mean = qdf.mean
     cov_matrix = qdf.cov_matrix
     #print_cov_matrix(cov_matrix)
     
     # cacualte the shared covariance matirx
     avg_cov = np.matlib.zeros(cov_matrix[0].shape)
     for i in range(self.num_class):
         avg_cov += (prior[i] * cov_matrix[i])
         
     self.inverse_cov = avg_cov.getI() # get the inverse covariance matrix
     
     num_feature = x_train.shape[1]
     # each column for weight[i]
     weight = np.matrix([0] * num_feature).T
     self.w0 = []
     for i in range(self.num_class):
         wi = 2 * self.inverse_cov.T * mean[i]
         weight = np.hstack((weight, wi))
         
         wi0 = 2 * math.log(prior[i]) - (mean[i].T * self.inverse_cov * mean[i])[0,0]
         self.w0.append(wi0)
         
     self.weight = weight[:, 1:]
     
     return self
Ejemplo n.º 5
0
 def predict(self, x_test):
     predicted_labels = []
     for row in x_test:
         x = np.matrix(row, np.float64).T
         max_posteriori = -float('inf')
         prediction = -1
         for i in range(self.num_class):
             p = (-1 * (x.T * self.inverse_cov * x) + self.weight[:, i].T * x + self.w0[i])[0,0]
             #p = (self.weight[:, i].T * x + self.w0[i])[0,0]
             if p > max_posteriori:
                 max_posteriori = p
                 prediction = i
                 
         predicted_labels.append(prediction)
         
     return QDF.map_class_index(predicted_labels, self.label_map)
Ejemplo n.º 6
0
    def predict(self, x_test):
        predicted_labels = []
        for row in x_test:
            x = np.matrix(row, np.float64).T
            max_posteriori = -float('inf')
            prediction = -1
            for i in range(self.num_class):
                p = (-1 * (x.T * self.inverse_cov * x) +
                     self.weight[:, i].T * x + self.w0[i])[0, 0]
                #p = (self.weight[:, i].T * x + self.w0[i])[0,0]
                if p > max_posteriori:
                    max_posteriori = p
                    prediction = i

            predicted_labels.append(prediction)

        return QDF.map_class_index(predicted_labels, self.label_map)