Example #1
0
    def __init__(self, dimensions, noise=0.1, reconstruction_penalty=1.0,
                 sparsity_penalty=1.0, embedding_penalty=1.0,
                 learning_rate=0.01, seed=None):
        """Initialize a stack of autoencoders with sparsity penalty

        dimensions is a python sequence of the input, hidden and output
        activation unit of the stacked architecture.

        TODO: implement fine-tuning by applying SGD on the encoder using a
        divergence measure on the pairwise similarities in input and output
        space as object function to minimize. E.g.: (t-)SNE or Elastic
        Embedding.

        """
        assert len(dimensions) >= 2
        self.rng = np.random.RandomState(seed)
        self.noise_rng = RandomStreams(seed)

        # build a stack of autoencoders for the requested dimensions
        self.autoencoders = []
        previous_output = T.matrix('ae_in')

        for in_dim, out_dim in zip(dimensions[:-1], dimensions[1:]):
            ae = Autoencoder(in_dim, out_dim, tied=True, noise=noise,
                             rng=self.rng, noise_rng=self.noise_rng)
            ae.build(previous_output)
            previous_output = ae.output
            self.autoencoders.append(ae)

        # chain the encoding parts as a feed forward network
        self.encoder = NNet(self.autoencoders, errors.mse)
        self.encoder.build(T.matrix('enc_in'), T.vector('enc_target'))

        # compile the training functions
        self.reconstruction_penalty = reconstruction_penalty
        self.sparsity_penalty = sparsity_penalty
        self.embedding_penalty = embedding_penalty
        self.pre_trainers = []
        for ae in self.autoencoders:
            cost = self.get_ae_cost(ae)
            pre_train = theano.function(
                [self.autoencoders[0].input],
                cost,
                updates=get_updates(ae.pre_params, cost, learning_rate)
            )
            self.pre_trainers.append(pre_train)

        # compile the enconding function
        self.encode = theano.function([self.encoder.input], self.encoder.output)
Example #2
0
class SDAEmbedder(object):
    """Build a stack of denoising autoencoders to perform low dim embedding"""

    def __init__(self, dimensions, noise=0.1, reconstruction_penalty=1.0,
                 sparsity_penalty=1.0, embedding_penalty=1.0,
                 learning_rate=0.01, seed=None):
        """Initialize a stack of autoencoders with sparsity penalty

        dimensions is a python sequence of the input, hidden and output
        activation unit of the stacked architecture.

        TODO: implement fine-tuning by applying SGD on the encoder using a
        divergence measure on the pairwise similarities in input and output
        space as object function to minimize. E.g.: (t-)SNE or Elastic
        Embedding.

        """
        assert len(dimensions) >= 2
        self.rng = np.random.RandomState(seed)
        self.noise_rng = RandomStreams(seed)

        # build a stack of autoencoders for the requested dimensions
        self.autoencoders = []
        previous_output = T.matrix('ae_in')

        for in_dim, out_dim in zip(dimensions[:-1], dimensions[1:]):
            ae = Autoencoder(in_dim, out_dim, tied=True, noise=noise,
                             rng=self.rng, noise_rng=self.noise_rng)
            ae.build(previous_output)
            previous_output = ae.output
            self.autoencoders.append(ae)

        # chain the encoding parts as a feed forward network
        self.encoder = NNet(self.autoencoders, errors.mse)
        self.encoder.build(T.matrix('enc_in'), T.vector('enc_target'))

        # compile the training functions
        self.reconstruction_penalty = reconstruction_penalty
        self.sparsity_penalty = sparsity_penalty
        self.embedding_penalty = embedding_penalty
        self.pre_trainers = []
        for ae in self.autoencoders:
            cost = self.get_ae_cost(ae)
            pre_train = theano.function(
                [self.autoencoders[0].input],
                cost,
                updates=get_updates(ae.pre_params, cost, learning_rate)
            )
            self.pre_trainers.append(pre_train)

        # compile the enconding function
        self.encode = theano.function([self.encoder.input], self.encoder.output)

    def pre_train(self, data, slice_=slice(None, None), batch_size=50,
                  epochs=100, checkpoint=10, patience=20, tolerance=1e-5):
        """Iteratively apply SGD to each autoencoder

        If slice_ is provided, only the matching layers are trained (by default
        all layers are trained).
        """
        data = np.atleast_2d(data)
        data = np.asanyarray(data, dtype=theano.config.floatX)
        n_samples, n_features = data.shape

        best_error = None
        best_epoch = 0
        n_batches = n_samples / batch_size

        # select the trainers to use
        trainers = self.pre_trainers[slice_]

        shuffled = data.copy()
        for i, trainer in enumerate(trainers):
            for e in xrange(epochs):
                # reshuffling data to enforce I.I.D. assumption
                self.rng.shuffle(shuffled)

                err = np.zeros(n_batches)
                for b in xrange(n_batches):
                    batch_input = shuffled[b * batch_size:(b + 1) * batch_size]
                    err[b] = trainer(batch_input).mean()

                error = err.mean()
                if e % checkpoint == 0:
                    print "layer [%d/%d], epoch [%03d/%03d]: err: %0.5f" % (
                        i + 1, len(trainers), e + 1, epochs,
                        error)
                if best_error is None or error <  best_error - tolerance:
                    best_error = error
                    best_epoch = e
                else:
                    if e - best_epoch > patience:
                        print "layer [%d/%d]: early stopping at epoch %d" % (
                            i + 1, len(trainers), e + 1)
                        break

    def fine_tune(self, data, batch_size=50, epochs=100, learning_rate=0.1,
                  checkpoint=10, patience=20, tolerance=1e-5):
        """Use SGD to optimize the embedding computed by the encoder stack"""
        data = np.atleast_2d(data)
        data = np.asanyarray(data, dtype=theano.config.floatX)
        n_samples, n_features = data.shape

        best_error = None
        best_epoch = 0
        n_batches = n_samples / batch_size

        cost = self.get_embedding_cost(self.autoencoders[-1])
        tuner = theano.function(
            [self.autoencoders[0].input],
            cost,
            updates=get_updates(self.encoder.params, cost, learning_rate)
        )

        shuffled = data.copy()
        for e in xrange(epochs):
            # reshuffling data to enforce I.I.D. assumption
            self.rng.shuffle(shuffled)

            err = np.zeros(n_batches)
            for b in xrange(n_batches):
                batch_input = shuffled[b * batch_size:(b + 1) * batch_size]
                err[b] = tuner(batch_input).mean()

            error = err.mean()
            if e % checkpoint == 0:
                print "fine tune: epoch [%03d/%03d]: err: %0.5f" % (
                    e + 1, epochs, error)
            if best_error is None or error <  best_error - tolerance:
                best_error = error
                best_epoch = e
            else:
                if e - best_epoch > patience:
                    print "fine tune: early stopping at epoch %d" % (e + 1)
                    break

    def get_ae_cost(self, ae):
        cost = 0.0
        if self.reconstruction_penalty > 0:
            cost += self.reconstruction_penalty * ae.cost
        if self.sparsity_penalty > 0:
            # assuming the activation of each unit lies in [-1, 1], take the
            # L1 norm of the activation
            # TODO: run a component wise online estimate of the activations
            # using a exponential decay instead of the spatial sparsity
            # constraint
            cost += self.sparsity_penalty * T.mean(abs(ae.output + 1))
        if self.embedding_penalty > 0:
            cost += self.embedding_penalty * self.get_embedding_cost(ae)
        return cost

    def get_embedding_cost(self, ae, lambda_=0.5):
        """Local divergence from pairwise similarities in input and output

        The following is derived from the Elastic Embedding cost from
        M. Carreira-Perpinan 2010.
        """
        ae_in = self.autoencoders[0]
        # TODO: make it possible to provide dx2 and dx2.mean() in advance and
        # online estimate dy2.mean() on the whole collection using a exponential
        # decay
        dx2 = T.sum((ae_in.input[:-1] - ae_in.input[1:]) ** 2, axis=1)
        dy2 = T.sum((ae.output[:-1] - ae.output[1:]) ** 2, axis=1)
        return  (1 - lambda_) * T.mean(dy2 / dy2.mean() * T.exp(-dx2 / dx2.mean())) + \
                lambda_ * T.mean(dx2 / dx2.mean() * T.exp(-dy2 / dy2.mean()))