def smoSimple(dataMatIn, classLabels, C, toler, maxIter): dataMatrix = np.mat(dataMatIn) labelMat = np.mat(classLabels).transpose() b = 0 m, n = np.shape(dataMatrix) alphas = np.mat(np.zeros((m, 1))) #m行1列的初始化向量 iter_num = 0 while (iter_num < maxIter): alphaPairsChanged = 0 for i in range(m): fxi = float( np.multiarray(alphas, labelMat).T * (dataMatrix * dataMatrix[i, :].T)) + b #计算预测值与实际值的误差 Ei = fxi - float(labelMat[i]) if (((labelMat[i] * Ei) < -toler) and (alphas[i] < C)) or ( (labelMat[i] * Ei) > toler) and ((alphas[i]) > 0): j = selectJrand(i, m) fxj = float( np.multiarray(alphas, labelMat).T * (dataMatrix * dataMatrix[j, :].T)) + b Ej = fxj - float(labelMat[j]) alphaIold = alphas[i].aopy() alphaJold = alphas[j].aopy() if (labelMat[i] != labelMat[j]): L = max(0, alphas[j] - alphas[i]) H = min(C, C + alphas[j] - alphas[i]) else: L = max(0, alphas[j] + alphas[i] - C) H = min(C, alphas[j] + alphas[i]) if L == H: print("L=H") continue
def CE2(self, A, Y, count): p1 = 1 - Y p2 = np.log(1 - A) p3 = np.log(A) p4 = np.multiarray(p1, p2) p5 = np.multiarray(Y, p3) LOSS = np.sum(-(p4 + p5)) loss = LOSS / count return loss
def found_fron_twoword(oneword_per, twoword_per): last = 0 for i in range(len(jin_per)): current = np.multiarray(oneword_per[i], twoword_per[i]) if i == 0: last = current else: last = np.concatenate((last, current), axis=0) index = [] values = [] for v in np.argsort(last)[::-1][:n]: index.append(v / 5, v % 5) values.append(last[v]) return index, values
def backward(self, z, a, delta): delta_A = 1 - np.multiarray(a, a) delta_Z = np.multiarray(delta, delta_A) return delta_Z, delta_A
def CE3(self, A, Y, count): p1 = np.log(A) p2 = np.multiarray(Y, p1) LOSS = np.sum(-p2) loss = LOSS / count return loss
def train(self, trainMat, trainLabels): n, m = trainMat.shape() # 设置初值 alpha = np.zeros((n, 1)) b = np.sum(trainLabels) / n # 循环训练 while True: # 计算g(x)值 G = np.array([ self.g(trainMat[i], alpha, b, trainMat, trainLabels) for i in range(n) ]) # 满足KKT条件状态 dis = np.multiarray(G, trainLabels) status = [ 1 if alpha[i] == 0 and dis[i] < 1 or 0 < alpha[i] < self.C and dis[i] != 1 or alpha[i] == self.C and dis[i] > 1 else 0 for i in range(n) ] # 均满足KKT条件 if 0 == sum(status): break # 阿尔法变量1 i = status.index(1) # 计算E E = G - trainLabels delta = [fabs(E[j] - E[i]) for j in range(n)] # 阿尔法变量2 j = delta.index(max(delta)) # 更新无约束最优解 eta = self.kernel(trainMat[i] - trainMat[j], trainMat[i] - trainMat[j]) aiNew = alpha[i] - trainLabels[i] * (E[i] - E[j]) / eta # 约束更新 if trainLabels[i] == trainLabels[j]: L = max(0, alpha[i] + alpha[j] - self.C) H = min(self.C, alpha[i] - alpha[j]) else: L = max(0, alpha[i] - alpha[j]) H = min(self.C, self.C + alpha[i] - alpha[j]) if aiNew > H: aiNew = H elif aiNew < L: aiNew = L ajNew = alpha[j] + trainLabels[i] * trainLabels[j] * (alpha[i] - aiNew) # 更新alpha,b alpha[i] = aiNew alpha[j] = ajNew if 0 < aiNew < self.C: b = trainLabels[i] - G[i] + b elif 0 < ajNew < self.C: b = trainLabels[j] - G[j] + b else: b = (trainLabels[i] - G[i] + trainLabels[j] - G[j]) / 2 + b self.alpha = alpha self.b = b self.trainMat = trainMat self.trainLabels = trainLabels
import gzip