Ejemplo n.º 1
0
 def __init__(self, arr):
     self.M = arr
     self.m, self.n = arr.shape
     # print 'm, n : %d, %d' % (self.m, self.n)
     self.bound = array([1] * (self.m + 1))
     self.bit_tree = BitTree(maxsize=self.m)
     # self.delta = np.zeros(self.m, dtype=float)
     self.model = []
     for i in range(self.m):
         ne = sum(1 for x in self.M[i] if x == 1)
         pe = min(ne / self.n, 1 - 1e-6)
         t = (i, ne, pe)
         self.model.append(t)
     # print 'get model: '
     # pprint(self.model)
     self.model = sorted(self.model, key=lambda x: -x[1])
     for i in range(len(self.model)):
         self.bit_tree.update(i, self.model[i][1])
Ejemplo n.º 2
0
class BaseSegmentUnit(object):
    def __init__(self, arr):
        self.M = arr
        self.m, self.n = arr.shape
        # print 'm, n : %d, %d' % (self.m, self.n)
        self.bound = array([1] * (self.m + 1))
        self.bit_tree = BitTree(maxsize=self.m)
        # self.delta = np.zeros(self.m, dtype=float)
        self.model = []
        for i in range(self.m):
            ne = sum(1 for x in self.M[i] if x == 1)
            pe = min(ne / self.n, 1 - 1e-6)
            t = (i, ne, pe)
            self.model.append(t)
        # print 'get model: '
        # pprint(self.model)
        self.model = sorted(self.model, key=lambda x: -x[1])
        for i in range(len(self.model)):
            self.bit_tree.update(i, self.model[i][1])
        # print 'after sort:'
        # pprint(self.model)
        # pprint(self.M)

    def show(self):
        print "获取的部分S[]数组"
        print self.M
        print "model:"
        pprint(self.model)

    @property
    def l(self):
        """
        :return: 当前划分得到几个段内分组
        """
        return sum(1 for x in self.bound if x == 1) - 1

    @property
    def lm(self):
        """
        :return: 当前划分得到的Lm
        """
        return 2 * self.l * log(self.m, 2) + self.m * log(self.m, 2)

    @property
    def ld(self):
        """
        :return: 当前划分得到的Ld
        """
        # print 'pr: %f' % (self.pr, )
        return -log(self.pr, 2)

    @property
    def ll(self):
        """
        :return: Ll = Ld + Lm
        """
        return self.ld + self.lm

    @property
    def pr(self):
        """
        当前分组得到的Pr
        :return: Pr  (Ld = -log(Pr, 2))
        """
        res = 1.0
        # print 'bound: '
        # print self.bound
        for i, v in enumerate(self.bound):
            if v == 0 or i == 0:
                continue
            a = self.prev_bound(i)
            if a is None:
                continue
            px = self.bit_tree.query(a, i - 1) / ((i - a) * self.n)
            """
            print 'n: %d' % (self.n, )
            print 'a: %d, i: %d' % (a, i)
            print 'sn: %d' % (self.bit_tree.query(a, i-1), )
            print 'px : %f' % (px, )
            """
            f = lambda x: px ** x[1] * (1 - px) ** (self.n - x[1])
            for i in range(a, i):
                res *= f(self.model[i])
        return res

    def prev_bound(self, i):
        """
        :param i:
        :return: i的前一个边界, 复杂度O(n), 可以考虑采用别的方式进行优化(例如保存起来并维护)
        """
        a = max(x[0] for x in enumerate(self.bound) if x[1] and x[0] < i)
        return a

    def next_bound(self, i):
        """
        :param i: 边界位置
        :return: 返回i的后一个边界
        """
        b = min(x[0] for x in enumerate(self.bound) if x[1] and x[0] > i)
        return b

    def find(self):
        """
        :return: 进行算法步骤, 返回一个值表示最优的Ll, 并将结果保存进self.model, self.bound.
        可以考虑将结果用更好的方式进行保存.
        """
        raise NotImplementedError()