예제 #1
0
 def test_Kernel(self):
     return
     d = DigitDataSet()
     data_path = os.path.join('..','data', 'sample')
     d.load(data_path)
     m, n = d.shape()
     p = 0
     n = 0
     el = 0
     k = Kernel(d, kernel_func)
     for i in range(m):
         for j in range(m):
             v = k.compute(i, j)
             if v == 0:
                 n += 1
             elif v == 1:
                 p +=1
             else:
                 el +=1
     print('p', p)
     print('n', n)
     print('el',el)
예제 #2
0
class SMOAlgorithm(object):
    def __init__(self, dataSet, C, toler, kernel_func = kernel_func):
        self.C = C
        self.toler = toler
        self.dataSet = dataSet
        self.row, self.dim = self.dataSet.shape()
        self.alphas = numpy.array([0 for i in range(self.row)], dtype=float)  #格朗日乘算子
        self.b = 0
        self.kernel = Kernel(dataSet, kernel_func) #核函数

        self.EiCache = numpy.array([float('inf') for i in range(self.row)], dtype = float) #初始化Ei缓存,q正无穷

    def _computeEi(self, i, reCompute = False):
        '''
        计算Ei
        reCompute 标志是否需要重新计算
        '''
        #Ei被计算过,且不需要重新计算的情况
        inf = float('inf')
        if self.EiCache[i] != inf and reCompute == False:
            yi = self.dataSet.getLabel(i)
            eCache = self.EiCache[i]
            return eCache + self.b - yi

        #Ei未计算过,或需要重新计算的情况
        assert (self.EiCache[i] == inf) or (reCompute == True)
        Xi = self.dataSet.getData(i)
        yi = self.dataSet.getLabel(i)
        eCache = 0
        for j in range(self.row):
            alpha_j = self.alphas[j]
            yj = self.dataSet.getLabel(j)
            kij = self.kernel.compute(i,j)
            eCache += alpha_j * yj * kij

        self.EiCache[i] = eCache
        return eCache + self.b - yi

    def _computeEta(self, i1, i2):
        '''计算eta'''
        X1 = self.dataSet.getData(i1)
        X2 = self.dataSet.getData(i2)

        k11 = self.kernel.compute(i1, i1)
        k12 = self.kernel.compute(i1, i2)
        k22 = self.kernel.compute(i2, i2)
        return 2*k12 - k11 - k22

    def _selectI(self, i2, E2):
        '''
        选择第二个优化的变量
        即delta = |Ei - Ej|, 选择j使得delta最大
        '''
        index_list = [] #非0非C alpha的索引值
        for index in range(len(self.alphas)):
            alpha = self.alphas[index]
            if alpha > 0  and alpha < self.C:
                index_list.append(index)

        if len(index_list) > 0:
            '''|Ei - Ej|'''
            maxIndex = -1
            maxDelta = -1
            for i1 in range(len(self.alphas)):
                E1 = self._computeEi(i1)
                delta = abs(E1 - E2)
                if delta > maxDelta:
                    maxDelta = delta
                    maxIndex = i1
            logging.debug('i2={0}, when (i1={1}) ,|E1-E2|->max, max={2}'.format(i2, maxIndex, maxDelta))
            return maxIndex
        else:
            #0< alpha <C, 符合要求的拉格朗日乘算子小于1,则随机选择j
            i1 = selectByRandom(i2, self.row)
            logging.debug('i2={0}, select i1 by random, i1={1}'.format(i2,i1))
            return i1

    def _takeSetp(self, i1, i2, E2):
        '''更新alpha[i1], alpha[i2]'''
        logging.debug('takeStep({0}, {1})'.format(i1, i2))
        assert i1 <= self.row and i2 <= self.row
        if i1 == i2:
            return 0
        y1 = self.dataSet.getLabel(i1)
        y2 = self.dataSet.getLabel(i2)
        E1 = self._computeEi(i1)
        alpha1 = self.alphas[i1]
        alpha2 = self.alphas[i2]

        if y1 != y2:
            L = max(0.0, alpha2 - alpha1)
            H = min(self.C, self.C + alpha2 - alpha1)
        else:
            L = max(0.0, alpha1 + alpha2 - self.C)
            H = min(self.C, alpha1 + alpha2)

        if L == H:
            logging.debug('L = {0}, H = {1}, alpha1 = {2}, alpha2 = {3}'.format(L, H, alpha1, alpha2))
            return 0
        eta = self._computeEta(i1, i2)
        if eta >= 0:
            logging.debug('eta >= 0')
            return 0

        alpha1_old = alpha1
        alpha2_old = alpha2
        alpha2_new = alpha2_old- y2 * (E1 - E2) / eta
        self.alphas[i2] = alpha2_new

        if alpha2_new > H:
            alpha2_new = H
        if L > alpha2_new:
            alpha2_new = L

        if abs(alpha2_new- alpha2_old) < 0.0001:
            logging.debug("alpha2_new ={0}, alpha2_old = {1}, i2 = {2} not moving enough".format(alpha2_new, alpha2_old, i2))
            return 0

        #更新alpha[i1], alpha[i2], b
        alpha1_new = alpha1 + y1 * y2 * (alpha2_old - alpha2_new)
        self.alphas[i1] = alpha1_new

        #alpha更新导致Ei需要重新计算
        ei1 = self._computeEi(i1, reCompute = True)
        ei2 = self._computeEi(i2, reCompute = True)

        k11 = self.kernel.compute(i1, i1)
        k12 = self.kernel.compute(i1, i2)
        k22 = self.kernel.compute(i2, i2)

        X1 = self.dataSet.getData(i1)
        X2 = self.dataSet.getData(i2)
        b1 = self.b - E1 - y1 * (alpha1_new- alpha1_old) * k11 - y2 * (alpha2_new - alpha2_old) * k12
        b2 = self.b - E2 - y1 * (alpha1_new- alpha1_old) * k12 - y2 * (alpha2_new - alpha2_old) * k22

        if (0 < alpha1_new) and (self.C > alpha1_new):
            self.b = b1
        elif (0 < alpha2_new) and (self.C > alpha2_new):
            self.b = b2
        else:
            self.b = (b1 + b2)/2.0

        logging.debug('alphas and b changed')
        return 1

    def _examine(self, i2):
        '''固定第一个参数,选择第二个参数'''
        y2 = self.dataSet.getLabel(i2)
        alpha2 = self.alphas[i2]
        E2 = self._computeEi(i2)
        r2 = E2 * y2

        #把违背KKT条件的i2作为第一个
        if ((r2 < -self.toler) and (alpha2 < self.C)) or ((r2 > self.toler) and (alpha2 > 0)):
            i1 = self._selectI(i2, E2)
            return self._takeSetp(i1, i2, E2)
        else:
            return 0

    def run(self):
        numChanged = 0
        examineAll = 1

        while(numChanged >0 or examineAll ==1):
            numChanged = 0
            if examineAll == 1:
                for i2 in range(self.row):
                    numChanged += self._examine(i2)
            else:
                for i2 in self.alphas:
                    if i2 > 0 and i2 < self.C:
                        numChanged += self._examine(i2)
            if examineAll == 1:
                examineAll = 0
            elif numChanged == 0:
                examineAll = 1
            logging.debug('numChanged = {0}'.format(numChanged))
        return self.alphas