def test_Kernel(self): return d = DigitDataSet() data_path = os.path.join('..','data', 'sample') d.load(data_path) m, n = d.shape() p = 0 n = 0 el = 0 k = Kernel(d, kernel_func) for i in range(m): for j in range(m): v = k.compute(i, j) if v == 0: n += 1 elif v == 1: p +=1 else: el +=1 print('p', p) print('n', n) print('el',el)
class SMOAlgorithm(object): def __init__(self, dataSet, C, toler, kernel_func = kernel_func): self.C = C self.toler = toler self.dataSet = dataSet self.row, self.dim = self.dataSet.shape() self.alphas = numpy.array([0 for i in range(self.row)], dtype=float) #格朗日乘算子 self.b = 0 self.kernel = Kernel(dataSet, kernel_func) #核函数 self.EiCache = numpy.array([float('inf') for i in range(self.row)], dtype = float) #初始化Ei缓存,q正无穷 def _computeEi(self, i, reCompute = False): ''' 计算Ei reCompute 标志是否需要重新计算 ''' #Ei被计算过,且不需要重新计算的情况 inf = float('inf') if self.EiCache[i] != inf and reCompute == False: yi = self.dataSet.getLabel(i) eCache = self.EiCache[i] return eCache + self.b - yi #Ei未计算过,或需要重新计算的情况 assert (self.EiCache[i] == inf) or (reCompute == True) Xi = self.dataSet.getData(i) yi = self.dataSet.getLabel(i) eCache = 0 for j in range(self.row): alpha_j = self.alphas[j] yj = self.dataSet.getLabel(j) kij = self.kernel.compute(i,j) eCache += alpha_j * yj * kij self.EiCache[i] = eCache return eCache + self.b - yi def _computeEta(self, i1, i2): '''计算eta''' X1 = self.dataSet.getData(i1) X2 = self.dataSet.getData(i2) k11 = self.kernel.compute(i1, i1) k12 = self.kernel.compute(i1, i2) k22 = self.kernel.compute(i2, i2) return 2*k12 - k11 - k22 def _selectI(self, i2, E2): ''' 选择第二个优化的变量 即delta = |Ei - Ej|, 选择j使得delta最大 ''' index_list = [] #非0非C alpha的索引值 for index in range(len(self.alphas)): alpha = self.alphas[index] if alpha > 0 and alpha < self.C: index_list.append(index) if len(index_list) > 0: '''|Ei - Ej|''' maxIndex = -1 maxDelta = -1 for i1 in range(len(self.alphas)): E1 = self._computeEi(i1) delta = abs(E1 - E2) if delta > maxDelta: maxDelta = delta maxIndex = i1 logging.debug('i2={0}, when (i1={1}) ,|E1-E2|->max, max={2}'.format(i2, maxIndex, maxDelta)) return maxIndex else: #0< alpha <C, 符合要求的拉格朗日乘算子小于1,则随机选择j i1 = selectByRandom(i2, self.row) logging.debug('i2={0}, select i1 by random, i1={1}'.format(i2,i1)) return i1 def _takeSetp(self, i1, i2, E2): '''更新alpha[i1], alpha[i2]''' logging.debug('takeStep({0}, {1})'.format(i1, i2)) assert i1 <= self.row and i2 <= self.row if i1 == i2: return 0 y1 = self.dataSet.getLabel(i1) y2 = self.dataSet.getLabel(i2) E1 = self._computeEi(i1) alpha1 = self.alphas[i1] alpha2 = self.alphas[i2] if y1 != y2: L = max(0.0, alpha2 - alpha1) H = min(self.C, self.C + alpha2 - alpha1) else: L = max(0.0, alpha1 + alpha2 - self.C) H = min(self.C, alpha1 + alpha2) if L == H: logging.debug('L = {0}, H = {1}, alpha1 = {2}, alpha2 = {3}'.format(L, H, alpha1, alpha2)) return 0 eta = self._computeEta(i1, i2) if eta >= 0: logging.debug('eta >= 0') return 0 alpha1_old = alpha1 alpha2_old = alpha2 alpha2_new = alpha2_old- y2 * (E1 - E2) / eta self.alphas[i2] = alpha2_new if alpha2_new > H: alpha2_new = H if L > alpha2_new: alpha2_new = L if abs(alpha2_new- alpha2_old) < 0.0001: logging.debug("alpha2_new ={0}, alpha2_old = {1}, i2 = {2} not moving enough".format(alpha2_new, alpha2_old, i2)) return 0 #更新alpha[i1], alpha[i2], b alpha1_new = alpha1 + y1 * y2 * (alpha2_old - alpha2_new) self.alphas[i1] = alpha1_new #alpha更新导致Ei需要重新计算 ei1 = self._computeEi(i1, reCompute = True) ei2 = self._computeEi(i2, reCompute = True) k11 = self.kernel.compute(i1, i1) k12 = self.kernel.compute(i1, i2) k22 = self.kernel.compute(i2, i2) X1 = self.dataSet.getData(i1) X2 = self.dataSet.getData(i2) b1 = self.b - E1 - y1 * (alpha1_new- alpha1_old) * k11 - y2 * (alpha2_new - alpha2_old) * k12 b2 = self.b - E2 - y1 * (alpha1_new- alpha1_old) * k12 - y2 * (alpha2_new - alpha2_old) * k22 if (0 < alpha1_new) and (self.C > alpha1_new): self.b = b1 elif (0 < alpha2_new) and (self.C > alpha2_new): self.b = b2 else: self.b = (b1 + b2)/2.0 logging.debug('alphas and b changed') return 1 def _examine(self, i2): '''固定第一个参数,选择第二个参数''' y2 = self.dataSet.getLabel(i2) alpha2 = self.alphas[i2] E2 = self._computeEi(i2) r2 = E2 * y2 #把违背KKT条件的i2作为第一个 if ((r2 < -self.toler) and (alpha2 < self.C)) or ((r2 > self.toler) and (alpha2 > 0)): i1 = self._selectI(i2, E2) return self._takeSetp(i1, i2, E2) else: return 0 def run(self): numChanged = 0 examineAll = 1 while(numChanged >0 or examineAll ==1): numChanged = 0 if examineAll == 1: for i2 in range(self.row): numChanged += self._examine(i2) else: for i2 in self.alphas: if i2 > 0 and i2 < self.C: numChanged += self._examine(i2) if examineAll == 1: examineAll = 0 elif numChanged == 0: examineAll = 1 logging.debug('numChanged = {0}'.format(numChanged)) return self.alphas