Example #1
0
class Log(object):
    """Keep a random sample of stuff seen so far. Based on Dr. Menzies'
    implementation."""

    MAX_SIZE = 256

    def __init__(self, inits=None, label=None, max_size=MAX_SIZE):
        self._cache = SortedList()
        self._report = None
        self.label = label or ''
        self._n = 0
        self.max_size = max_size
        self._valid_statistics = False
        self._invalidate_statistics()
        if inits:
            map(self.__iadd__, inits)

    def random_index(self):
        return base.random_index(self._cache)

    @classmethod
    def wrap(cls, x, max_size=MAX_SIZE):
        if isinstance(x, cls):
            return x
        return cls(inits=x, max_size=max_size)

    def __len__(self):
        return len(self._cache)

    def extend(self, xs):
        if not isinstance(xs, collections.Iterable):
            raise TypeError()
        map(self.__iadd__, xs)

    def __iadd__(self, x):
        if x is None:
            return x

        self._n += 1

        if issubclass(x.__class__, Log):
            map(self.__iadd__, x._cache)
            return self

        changed = False

        # if cache has room, add item
        if self.max_size is None or len(self._cache) < self.max_size:
            changed = True
            self._cache.add(x)
        # cache is full: maybe replace an old item
        else:
            # items less likely to be replaced later in the run:
            # leads to uniform sample of entire run
            if random.random() <= self.max_size / len(self):
                changed = True
                self._cache.remove(random.choice(self._cache))
                self._cache.add(x)

        if changed:
            self._invalidate_statistics()
            self._change(x)

        return self

    def __add__(self, x, max_size=MAX_SIZE):
        inits = itertools.chain(self._cache, x._cache)
        return self.__class__(inits=inits, max_size=max_size)

    def any(self):
        return random.choice(self._cache)

    def report(self):
        if self._report is None:
            self._report = self._generate_report()
        return self._report

    def setup(self):
        raise NotImplementedError()

    def as_list(self):
        return self._cache.as_list()

    def _invalidate_statistics(self):
        '''
        default implementation. if _valid_statistics is something other than
        a boolean, reimplement!
        '''
        self._valid_statistics = False

    def ish(self, *args, **kwargs):
        raise NotImplementedError()

    def _change(self, x):
        '''
        override to add incremental updating functionality
        '''
        pass

    def _prepare_data(self):
        s = '_prepare_data() not implemented for ' + self.__class__.__name__
        raise NotImplementedError(s)

    def __iter__(self):
        return iter(self._cache)

    def contents(self):
        return self._cache.as_list()