Esempio n. 1
0
def test_delete_and_pickle():
    '''
    Deleting an element doesn't affect the remaining elements'
    indices.
    '''
    s = RecyclingSet(10, ['dog','cat','banana'])
    del s[1]
    eq_(s[1], None)
    eq_(s.index('banana'), 2)

    # Pickling doesn't change things.
    s2 = pickle.loads(pickle.dumps(s))

    eq_(s, s2)
    eq_(s2[1], None)
    eq_(s2.index('banana'), 2)

    assert None not in s2
    assert None not in s2
Esempio n. 2
0
class CCIPCA(object):
    """A Candid Covariance-free Incremental Principal Component Analysis implementation"""
    def __init__(self,
                 k,
                 ev=None,
                 i=0,
                 bootstrap=20,
                 amnesia=3.0,
                 remembrance=100000.0,
                 vector_size=10000,
                 auto_baseline=True):
        """Construct a CCIPCA computation with k initial eigenvectors ev at iteration i, using simple averaging until the iteration given by bootstrap, afterward using CCIPCA given amnesic parameter amnesia, rememberance parameter remembrance, and a weight vector and subspace criteria for simultaneous vector presentation"""
        if ev is not None: self._v = ev
        else:
            self._v = np.zeros((k, vector_size))

        self._k = k
        self._amnesia = amnesia
        self._iteration = i
        self._bootstrap = bootstrap
        self._remembrance = remembrance
        self._vector_size = vector_size
        self._labels = RecyclingSet(self._vector_size)
        self._labels.listen_for_drops(self.forget_column)
        self._auto_baseline = auto_baseline

    def zerovec(self):
        return np.zeros((self._vector_size, ))

    def compute_attractor(self, k, u, v_hat=None):
        """Compute the attractor vector for eigenvector k with vector u"""

        if k == 0: return u
        if v_hat is None: v_hat = hat(self._v[k])
        partial = u * np.dot(u, v_hat)
        return partial

    def update_eigenvector(self, k, u, learn=False):
        """Update eigenvector k with vector u, returning pair
        containing magnitude of eigenvector component and residue vector"""
        if learn:
            # Handle elementary cases
            if self._iteration < k:
                return 0.0, self.zerovec()

            if self._iteration == k:
                self._v[k, :] = u
                mag = np.linalg.norm(self._v[k])
                return mag, self.zerovec()

            # Compute weighting factors
            n = min(self._iteration, self._remembrance)
            if n < self._bootstrap:
                w_old = float(n - 1) / n
                w_new = 1.0 / n
            else:
                l = self._amnesia
                w_old = float(n - l) / n
                w_new = float(l) / n

            # Compute the attractor
            attractor = self.compute_attractor(k, u)

            # Approach attractor
            self._v[k] *= w_old
            self._v[k] += attractor * w_new

        # Calculate component magnitudes
        v_hat = hat(self._v[k])
        if k == 0:
            if self._auto_baseline: base_mag = np.linalg.norm(self._v[k])
            else: base_mag = 0.0
        else: base_mag = np.dot(u, v_hat)

        u_residue = u - (v_hat * base_mag)
        if k == 0:
            logger.debug('e0: %s' % str(self._v[k]))
            logger.debug('u: %s' % str(u))
            if learn:
                logger.debug('attractor: %s' % str(attractor))
            logger.debug('residue: %s' % str(u_residue))
            logger.debug('')
        return base_mag, u_residue

    def iteration(self, u_tensor, learn=False):
        """Train the eigenvector table with new vector u"""
        print "iteration = ", self._iteration
        mags = []
        u_copy = self.zerovec()
        if learn:
            for (key, ), value in u_tensor.iteritems():
                u_copy[self._labels.add(key)] = value
        else:
            for (key, ), value in u_tensor.iteritems():
                if key in self._labels:
                    u_copy[self._labels.index(key, touch=False)] = value
        if np.linalg.norm(u_copy) == 0:
            return np.zeros((self._v.shape[0], ))
        assert np.linalg.norm(u_copy) > 0

        for k in xrange(min(self._k, self._iteration + 1)):
            mag, new_u = self.update_eigenvector(k, u_copy, learn)
            u_copy = new_u
            mags.append(mag)
        if learn:
            self._iteration += 1

            # Sort all but the 0th vector by their sum of squares
            #print "magnitudes:", np.sum(self._v[1:] * self._v[1:], axis=1)
            sort_order = np.argsort(-np.sum(self._v[1:] * self._v[1:], axis=1))
            self._v[1:] = self._v[1:][sort_order]
        return mags

    def fixed_iteration(self, column, learn=False):
        """Train the eigenvector table with new vector u"""
        print "iteration = ", self._iteration
        mags = []
        u_copy = column[:]
        assert np.linalg.norm(u_copy) > 0

        for k in xrange(min(self._k, self._iteration + 1)):
            mag, new_u = self.update_eigenvector(k, u_copy, learn)
            u_copy = new_u
            mags.append(mag)
        if learn:
            self._iteration += 1

            # Sort all but the 0th vector by their sum of squares
            sort_order = np.argsort(-np.sum(self._v[1:] * self._v[1:], axis=1))
            self._v[1:] = self._v[1:][sort_order]
        return mags

    def reconstruct_array(self, weights):
        # Create a linear combination of the eigenvectors
        sum = self.zerovec()
        for index, w in enumerate(weights):
            sum += self._v[index] * w
        return sum

    def reconstruct(self, weights):
        """
        Get a linear combination of the eigenvectors, and re-express it as
        a Divisi tensor (a dense labeled vector).
        """
        array = self.reconstruct_array(weights)
        assert str(array[0]) != 'nan'
        return LabeledView(DenseTensor(array), [self._labels])

    def get_array(self):
        return self._v

    def get_labels(self):
        return self._labels

    def smooth(self, u, k_max=None, learn=False):
        mags = self.iteration(u, learn)
        if k_max is not None:
            mags = mags[:k_max]
        vec = self.reconstruct(mags)
        if not learn:
            logger.debug("decomp: %s" % str(mags))
        return vec

    def forget_column(self, slot, label):
        logger.debug("forgetting column %d" % slot)
        self._v[:, slot] = 0
Esempio n. 3
0
class CCIPCA(object):
    """A Candid Covariance-free Incremental Principal Component Analysis implementation"""

    def __init__(
        self, k, ev=None, i=0, bootstrap=20, amnesia=3.0, remembrance=100000.0, vector_size=10000, auto_baseline=True
    ):
        """Construct a CCIPCA computation with k initial eigenvectors ev at iteration i, using simple averaging until the iteration given by bootstrap, afterward using CCIPCA given amnesic parameter amnesia, rememberance parameter remembrance, and a weight vector and subspace criteria for simultaneous vector presentation"""
        if ev is not None:
            self._v = ev
        else:
            self._v = np.zeros((k, vector_size))

        self._k = k
        self._amnesia = amnesia
        self._iteration = i
        self._bootstrap = bootstrap
        self._remembrance = remembrance
        self._vector_size = vector_size
        self._labels = RecyclingSet(self._vector_size)
        self._labels.listen_for_drops(self.forget_column)
        self._auto_baseline = auto_baseline

    def zerovec(self):
        return np.zeros((self._vector_size,))

    def compute_attractor(self, k, u, v_hat=None):
        """Compute the attractor vector for eigenvector k with vector u"""

        if k == 0:
            return u
        if v_hat is None:
            v_hat = hat(self._v[k])
        partial = u * np.dot(u, v_hat)
        return partial

    def update_eigenvector(self, k, u, learn=False):
        """Update eigenvector k with vector u, returning pair
        containing magnitude of eigenvector component and residue vector"""
        if learn:
            # Handle elementary cases
            if self._iteration < k:
                return 0.0, self.zerovec()

            if self._iteration == k:
                self._v[k, :] = u
                mag = np.linalg.norm(self._v[k])
                return mag, self.zerovec()

            # Compute weighting factors
            n = min(self._iteration, self._remembrance)
            if n < self._bootstrap:
                w_old = float(n - 1) / n
                w_new = 1.0 / n
            else:
                l = self._amnesia
                w_old = float(n - l) / n
                w_new = float(l) / n

            # Compute the attractor
            attractor = self.compute_attractor(k, u)

            # Approach attractor
            self._v[k] *= w_old
            self._v[k] += attractor * w_new

        # Calculate component magnitudes
        v_hat = hat(self._v[k])
        if k == 0:
            if self._auto_baseline:
                base_mag = np.linalg.norm(self._v[k])
            else:
                base_mag = 0.0
        else:
            base_mag = np.dot(u, v_hat)

        u_residue = u - (v_hat * base_mag)
        if k == 0:
            logger.debug("e0: %s" % str(self._v[k]))
            logger.debug("u: %s" % str(u))
            if learn:
                logger.debug("attractor: %s" % str(attractor))
            logger.debug("residue: %s" % str(u_residue))
            logger.debug("")
        return base_mag, u_residue

    def iteration(self, u_tensor, learn=False):
        """Train the eigenvector table with new vector u"""
        print "iteration = ", self._iteration
        mags = []
        u_copy = self.zerovec()
        if learn:
            for (key,), value in u_tensor.iteritems():
                u_copy[self._labels.add(key)] = value
        else:
            for (key,), value in u_tensor.iteritems():
                if key in self._labels:
                    u_copy[self._labels.index(key, touch=False)] = value
        if np.linalg.norm(u_copy) == 0:
            return np.zeros((self._v.shape[0],))
        assert np.linalg.norm(u_copy) > 0

        for k in xrange(min(self._k, self._iteration + 1)):
            mag, new_u = self.update_eigenvector(k, u_copy, learn)
            u_copy = new_u
            mags.append(mag)
        if learn:
            self._iteration += 1

            # Sort all but the 0th vector by their sum of squares
            # print "magnitudes:", np.sum(self._v[1:] * self._v[1:], axis=1)
            sort_order = np.argsort(-np.sum(self._v[1:] * self._v[1:], axis=1))
            self._v[1:] = self._v[1:][sort_order]
        return mags

    def fixed_iteration(self, column, learn=False):
        """Train the eigenvector table with new vector u"""
        print "iteration = ", self._iteration
        mags = []
        u_copy = column[:]
        assert np.linalg.norm(u_copy) > 0

        for k in xrange(min(self._k, self._iteration + 1)):
            mag, new_u = self.update_eigenvector(k, u_copy, learn)
            u_copy = new_u
            mags.append(mag)
        if learn:
            self._iteration += 1

            # Sort all but the 0th vector by their sum of squares
            sort_order = np.argsort(-np.sum(self._v[1:] * self._v[1:], axis=1))
            self._v[1:] = self._v[1:][sort_order]
        return mags

    def reconstruct_array(self, weights):
        # Create a linear combination of the eigenvectors
        sum = self.zerovec()
        for index, w in enumerate(weights):
            sum += self._v[index] * w
        return sum

    def reconstruct(self, weights):
        """
        Get a linear combination of the eigenvectors, and re-express it as
        a Divisi tensor (a dense labeled vector).
        """
        array = self.reconstruct_array(weights)
        assert str(array[0]) != "nan"
        return LabeledView(DenseTensor(array), [self._labels])

    def get_array(self):
        return self._v

    def get_labels(self):
        return self._labels

    def smooth(self, u, k_max=None, learn=False):
        mags = self.iteration(u, learn)
        if k_max is not None:
            mags = mags[:k_max]
        vec = self.reconstruct(mags)
        if not learn:
            logger.debug("decomp: %s" % str(mags))
        return vec

    def forget_column(self, slot, label):
        logger.debug("forgetting column %d" % slot)
        self._v[:, slot] = 0