Ejemplo n.º 1
0
def choseBestFeature(dataSet, op=[1, 1]):  # 三个停止条件可否当作是三个预剪枝操作
    if len(set(get_label(dataSet))) == 1:  # 停止条件 1
        regLeaf = mean(get_label(dataSet))
        return None, regLeaf  # 返回标签的均值作为叶子节点
    Serror = GetAllVar(dataSet)
    BestFeature = -1
    BestNumber = 0
    lowError = float('inf')
    m, n = shape(dataSet)  # m 个样本, n -1 个特征
    for i in range(n - 1):  # 遍历每一个特征值
        for j in set(choice_column(dataSet, i)):  # 选择一列
            dataL, dataR = dataSplit(dataSet, i, j)  # 以该列某值划分
            #print dataR,"\n",dataL
            if shape(dataR)[0] < op[1] or shape(dataL)[0] < op[1]:
                continue  # 如果所给的划分后的数据集中样本数目甚少,则直接跳出
            tempError = GetAllVar(dataL) + GetAllVar(dataR)
            if tempError < lowError:
                lowError = tempError
                BestFeature = i
                BestNumber = j
    # if Serror - lowError < 0.01:               # 停止条件 2   如果所给的数据划分前后的差别不大,则停止划分
    #     print "Serror,lowError: ",Serror,lowError
    #     print "---",mean(get_label(dataSet))
    #     return None,mean(get_label(dataSet))
    dataL, dataR = dataSplit(dataSet, BestFeature, BestNumber)
    if shape(dataR)[0] < op[1] or shape(dataL)[0] < op[1]:  # 停止条件 3
        return None, mean(get_label(dataSet))
    return BestFeature, BestNumber
Ejemplo n.º 2
0
 def kernal_ridge_train(self, X, y, alpha):
     m, n = mat.shape(X)
     B = mat.create_by_mn(m, 1, 1.0)
     X = mat.extend(X, B)
     X_K = self.Kernal(X)  # 核函数映射,低维映射到高维
     X_T = mat.transpose(X_K)
     X_T_X = mat.multiply(X_T, X_K)
     m, n = mat.shape(X_T_X)
     I = mat.eye(m)
     alp_I = mat.n_mat(alpha, I)
     _inverse = mat.inverse(mat.add(X_T_X, alp_I))
     if _inverse == 0:
         self.isError = True
         print("逆矩阵不可求")
         return 0, 0
     _w = mat.multiply(mat.multiply(_inverse, X_T), y)
     return _w, X
Ejemplo n.º 3
0
    def stageWise(self, xArr, yArr, eps=0.01, numIt=5000):
        xMat = xArr
        yMat = yArr  #预测的变量的转置
        yMean = mat.mean(yMat, 0)
        yMat = mat.lasso_sub(yMat, yMean)
        xMat = mat.regularize(xMat)
        m, n = mat.shape(xMat)
        print xMat, yMat

        ws = mat.create_one_mn(n, 1)
        # tempws = copy.deepcopy(ws)
        # lowestError = float("inf")  # float("inf") #初始化当前迭代的最小误差表示为正无穷大
        for a_weight in ws:
            lowestError = float("inf")
            #mat.assign(tempws, ws)
            forward = 1
            old_rssE = mat.rssError(yMat, mat.multiply(xMat, ws))
            a_weight[0] = a_weight[0] + eps * forward
            rssE = mat.rssError(yMat, mat.multiply(xMat, ws))
            if rssE > old_rssE:
                forward = -1

            for time in range(numIt):
                a_weight[0] = a_weight[0] + eps * forward
                new_error = mat.rssError(yMat, mat.multiply(xMat, ws))
                if new_error > old_rssE:
                    break
                old_rssE = new_error
        print "ws: \n", ws
        return ws

        for i in range(numIt):
            ##每一次迭代
            for j in range(n):
                a1 = tempws[j][0]
                a2 = a1
                # 遍历每一个特征
                for sign in [-1, 1]:  # 两次循环,计算增加或者减少该特征对误差的影响
                    tempws[j][0] = a1
                    b = float(eps * sign)
                    tempws = mat.list_add(tempws, j, 0, b)
                    yTest = mat.multiply(xMat, tempws)
                    rssE = mat.rssError(yMat, yTest)  # 平方误差,将矩阵转换成为数组Array
                    if rssE < lowestError:
                        lowestError = rssE
                        a2 = tempws[j][0]
                        mat.assign(ws, tempws)
                if a2 != a1:
                    tempws[j][0] = a2
                else:
                    tempws[j][0] = a1
Ejemplo n.º 4
0
 def ridge_train(self, X, y, alpha):
     """
     :param X: 输入的特征矩阵
     :param y: 输入的标签
     :param alpha: a complexity parameter that controls the amount of shrinkage
     :return: 模型的权重系数
     w = (X.T*X+alpha*I).I*X.T*y
     """
     m, n = mat.shape(X)
     B = mat.create_by_mn(m, 1, 1.0)
     X = mat.extend(X, B)  # 将偏bias系数合并
     X_T = mat.transpose(X)
     X_T_X = mat.multiply(X_T, X)
     m, n = mat.shape(X_T_X)
     I = mat.eye(m)
     alp_I = mat.n_mat(alpha, I)
     _inverse = mat.inverse(mat.add(X_T_X, alp_I))
     if _inverse == 0:
         self.isError = True
         print("逆矩阵不可求")
         return 0
     _w = mat.multiply(mat.multiply(_inverse, X_T), y)
     return _w
Ejemplo n.º 5
0
def get_label(_dataSet):
    # 最后一列为标签
    return choice_column(_dataSet, shape(_dataSet)[1] - 1)
Ejemplo n.º 6
0
def GetAllVar(dataSet):
    return var(get_label(dataSet)) * shape(dataSet)[0]