コード例 #1
0
ファイル: embedding.py プロジェクト: ogrisel/codemaker
    def __init__(self, dimensions, noise=0.1, reconstruction_penalty=1.0,
                 sparsity_penalty=1.0, embedding_penalty=1.0,
                 learning_rate=0.01, seed=None):
        """Initialize a stack of autoencoders with sparsity penalty

        dimensions is a python sequence of the input, hidden and output
        activation unit of the stacked architecture.

        TODO: implement fine-tuning by applying SGD on the encoder using a
        divergence measure on the pairwise similarities in input and output
        space as object function to minimize. E.g.: (t-)SNE or Elastic
        Embedding.

        """
        assert len(dimensions) >= 2
        self.rng = np.random.RandomState(seed)
        self.noise_rng = RandomStreams(seed)

        # build a stack of autoencoders for the requested dimensions
        self.autoencoders = []
        previous_output = T.matrix('ae_in')

        for in_dim, out_dim in zip(dimensions[:-1], dimensions[1:]):
            ae = Autoencoder(in_dim, out_dim, tied=True, noise=noise,
                             rng=self.rng, noise_rng=self.noise_rng)
            ae.build(previous_output)
            previous_output = ae.output
            self.autoencoders.append(ae)

        # chain the encoding parts as a feed forward network
        self.encoder = NNet(self.autoencoders, errors.mse)
        self.encoder.build(T.matrix('enc_in'), T.vector('enc_target'))

        # compile the training functions
        self.reconstruction_penalty = reconstruction_penalty
        self.sparsity_penalty = sparsity_penalty
        self.embedding_penalty = embedding_penalty
        self.pre_trainers = []
        for ae in self.autoencoders:
            cost = self.get_ae_cost(ae)
            pre_train = theano.function(
                [self.autoencoders[0].input],
                cost,
                updates=get_updates(ae.pre_params, cost, learning_rate)
            )
            self.pre_trainers.append(pre_train)

        # compile the enconding function
        self.encode = theano.function([self.encoder.input], self.encoder.output)
コード例 #2
0
ファイル: embedding.py プロジェクト: ogrisel/codemaker
    def fine_tune(self, data, batch_size=50, epochs=100, learning_rate=0.1,
                  checkpoint=10, patience=20, tolerance=1e-5):
        """Use SGD to optimize the embedding computed by the encoder stack"""
        data = np.atleast_2d(data)
        data = np.asanyarray(data, dtype=theano.config.floatX)
        n_samples, n_features = data.shape

        best_error = None
        best_epoch = 0
        n_batches = n_samples / batch_size

        cost = self.get_embedding_cost(self.autoencoders[-1])
        tuner = theano.function(
            [self.autoencoders[0].input],
            cost,
            updates=get_updates(self.encoder.params, cost, learning_rate)
        )

        shuffled = data.copy()
        for e in xrange(epochs):
            # reshuffling data to enforce I.I.D. assumption
            self.rng.shuffle(shuffled)

            err = np.zeros(n_batches)
            for b in xrange(n_batches):
                batch_input = shuffled[b * batch_size:(b + 1) * batch_size]
                err[b] = tuner(batch_input).mean()

            error = err.mean()
            if e % checkpoint == 0:
                print "fine tune: epoch [%03d/%03d]: err: %0.5f" % (
                    e + 1, epochs, error)
            if best_error is None or error <  best_error - tolerance:
                best_error = error
                best_epoch = e
            else:
                if e - best_epoch > patience:
                    print "fine tune: early stopping at epoch %d" % (e + 1)
                    break