def __init__(self,
                 numpy_rng=None,
                 theano_rng=None,
                 n_input=150,
                 n_hidden=50,
                 n_label=3,
                 n_delay=6,
                 freq=3):
        """
        :type numpy_rng: np.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_input: int
        :param n_input: dimension of the input to the DBN

        :type n_hidden: int
        :param n_hidden: intermediate layer size

        :type n_label: int
        :param n_label: dimension of the output of the network (label layers)

        :type n_delay: int
        :param n_delay: number of past visible layer in the CRBM
        """

        self.params = []

        self.n_input = n_input
        self.n_hidden = n_hidden
        self.n_label = n_label
        self.delay = n_delay
        self.freq = freq

        if numpy_rng is None:
            # create a number generator
            numpy_rng = np.random.RandomState(1234)
        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.x_history = T.matrix(
            'x_history')  #memory : past visible is a recopy of visible layer
        self.y = T.lvector(
            'y'
        )  # the labels are presented as 1D vector of [int] labels (digit)

        # Construct an CRBM that shared weights with this layer
        self.crbm_layer = CRBM(numpy_rng=numpy_rng,
                               theano_rng=theano_rng,
                               input=self.x,
                               input_history=self.x_history,
                               n_visible=n_input,
                               n_hidden=n_hidden,
                               delay=n_delay,
                               freq=freq)

        self.params.append(self.crbm_layer.W)
        self.params.append(self.crbm_layer.B)
        self.params.append(self.crbm_layer.hbias)

        # We now need to add a logistic layer on top of the MLP
        input_logistic = T.nnet.sigmoid(
            T.dot(self.x, self.crbm_layer.W) +
            T.dot(self.x_history, self.crbm_layer.B) + self.crbm_layer.hbias)
        self.logLayer = LogisticRegression(input=input_logistic,
                                           n_in=n_hidden,
                                           n_out=n_label)

        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
Exemple #2
0
 def make_output(self, output, collapse=True, sample_mean=None, gamma=None):
     self.output = output
     if collapse and self.depth > 1:
         self.output = self.make_consensus(self.output)
         if self.attrs['consensus'] == 'flat':
             self.attrs['n_out'] *= self.depth
     if self.attrs['batch_norm']:
         self.output = self.batch_norm(
             self.output,
             self.attrs['n_out'],
             sample_mean=sample_mean,
             gamma=gamma,
             use_sample=self.attrs['bn_use_sample'])
     if self.attrs['residual']:
         from .hidden import concat_sources
         z, n_in = concat_sources(self.sources,
                                  unsparse=True,
                                  expect_source=False)
         assert n_in == self.attrs['n_out']
         self.output += z
     if self.attrs['layer_drop'] > 0.0:
         # Stochastic Depth, http://arxiv.org/abs/1603.09382
         from .hidden import concat_sources
         z, n_in = concat_sources(self.sources,
                                  unsparse=True,
                                  expect_source=False)
         n_out = self.attrs['n_out']
         if n_in != n_out:
             print("Layer drop with additional projection %i -> %i" %
                   (n_in, n_out),
                   file=log.v4)
             if n_in > 0:
                 self.W_drop = self.add_param(
                     self.create_forward_weights(n_in,
                                                 n_out,
                                                 name="W_drop_%s" %
                                                 self.name))
                 z = T.dot(z, self.W_drop)
             else:
                 z = 0
         if self.train_flag:
             from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
             rng = RandomStreams(self.rng.randint(1234) + 1)
             import theano.ifelse
             drop = rng.binomial(n=1,
                                 p=self.attrs['layer_drop'],
                                 size=(1, ),
                                 dtype='int8')[0]
             # drop = theano.printing.Print("drop")(drop)
             self.output = theano.ifelse.ifelse(drop, z, self.output)
         else:
             drop = self.attrs['layer_drop']
             self.output = numpy.float32(drop) * z + numpy.float32(
                 1.0 - drop) * self.output
     if self.attrs['sparse']:
         self.output = T.argmax(self.output, axis=-1, keepdims=True)
     if self.attrs['sparse_filtering']:
         # https://dlacombejr.github.io/programming/2015/09/13/sparse-filtering-implemenation-in-theano.html
         fs = T.sqrt(self.output**2 + 1e-8)  # numerical stability
         l2fs = T.sqrt(T.sum(fs**2, axis=1))  # l2 norm of row
         nfs = fs / l2fs.dimshuffle(0, 'x')  # normalize rows
         l2fn = T.sqrt(T.sum(nfs**2, axis=0))  # l2 norm of column
         self.output = nfs / l2fn.dimshuffle('x', 0)  # normalize columns
     self.output.name = "%s.output" % self.name
     self._output = output
Exemple #3
0
            y = state.get_value()
            assert is_binary(y)
            s = y.sum(axis=1)
            assert np.all(s == 1)


validate_all_samples()

if vis_sample.ndim == 4:
    vis_sample.set_value(vis_batch)
else:
    vis_sample.set_value(dataset.get_design_matrix(vis_batch))

validate_all_samples()

theano_rng = MRG_RandomStreams(2012 + 9 + 18)

# Do one round of clamped sampling so the seed data gets to have an influence
# The sampling is bottom-to-top so if we don't do an initial round where we
# explicitly clamp vis_sample, its initial value gets discarded with no influence
sampling_updates = model.get_sampling_updates(
    layer_to_state, theano_rng, layer_to_clamp={model.visible_layer: True})

t1 = time.time()
sample_func = function([], updates=sampling_updates)
t2 = time.time()
print 'Clamped sampling function compilation took', t2 - t1
sample_func()

# Now compile the full sampling update
sampling_updates = model.get_sampling_updates(layer_to_state, theano_rng)
Exemple #4
0
    def __init__(self,
                 incoming,
                 encoder,
                 decoder,
                 x_distribution='bernoulli',
                 pz_distribution='gaussian',
                 qz_distribution='gaussian',
                 latent_size=50,
                 W=init.Normal(0.01),
                 b=init.Normal(0.01),
                 **kwargs):
        super(VAELayer, self).__init__(incoming, **kwargs)
        num_batch, n_features = self.input_shape
        self.num_batch = num_batch
        self.n_features = n_features
        self.x_distribution = x_distribution
        self.pz_distribution = pz_distribution
        self.qz_distribution = qz_distribution
        self.encoder = encoder
        self.decoder = decoder
        self._srng = RandomStreams()

        if self.x_distribution not in ['gaussian', 'bernoulli']:
            raise NotImplementedError
        if self.pz_distribution not in ['gaussian', 'gaussianmarg']:
            raise NotImplementedError
        if self.qz_distribution not in ['gaussian', 'gaussianmarg']:
            raise NotImplementedError

        self.params_encoder = lasagne.layers.get_all_params(encoder)
        self.params_decoder = lasagne.layers.get_all_params(decoder)
        for p in self.params_encoder:
            p.name = "VAELayer encoder :" + p.name
        for p in self.params_decoder:
            p.name = "VAELayer decoder :" + p.name

        self.num_hid_enc = encoder.output_shape[1]
        self.num_hid_dec = decoder.output_shape[1]
        self.latent_size = latent_size

        self.W_enc_to_z_mu = self.add_param(W, (self.num_hid_enc, latent_size))
        self.b_enc_to_z_mu = self.add_param(b, (latent_size, ))

        self.W_enc_to_z_logsigma = self.add_param(
            W, (self.num_hid_enc, self.latent_size))
        self.b_enc_to_z_logsigma = self.add_param(b, (latent_size, ))

        self.W_dec_to_x_mu = self.add_param(
            W, (self.num_hid_dec, self.n_features))
        self.b_dec_to_x_mu = self.add_param(b, (self.n_features, ))

        self.W_params = [
            self.W_enc_to_z_mu, self.W_enc_to_z_logsigma, self.W_dec_to_x_mu
        ] + self.params_encoder + self.params_decoder
        self.bias_params = [
            self.b_enc_to_z_mu, self.b_enc_to_z_logsigma, self.b_dec_to_x_mu
        ]

        params_tmp = []
        if self.x_distribution == 'gaussian':
            self.W_dec_to_x_logsigma = self.add_param(
                W, (self.num_hid_dec, self.n_features))
            self.b_dec_to_x_logsigma = self.add_param(b, (self.n_features, ))
            self.W_params += [self.W_dec_to_x_logsigma]
            self.bias_params += [self.b_dec_to_x_logsigma]
            self.W_dec_to_x_logsigma.name = "VAE: W_dec_to_x_logsigma"
            self.b_dec_to_x_logsigma.name = "VAE: b_dec_to_x_logsigma"
            params_tmp = [self.W_dec_to_x_logsigma, self.b_dec_to_x_logsigma]

        self.params = self.params_encoder + [self.W_enc_to_z_mu,
                                             self.b_enc_to_z_mu,
                                             self.W_enc_to_z_logsigma,
                                             self.b_enc_to_z_logsigma] + self.params_decoder + \
                      [self.W_dec_to_x_mu, self.b_dec_to_x_mu] + params_tmp

        self.W_enc_to_z_mu.name = "VAELayer: W_enc_to_z_mu"
        self.W_enc_to_z_logsigma.name = "VAELayer: W_enc_to_z_logsigma"
        self.W_dec_to_x_mu.name = "VAELayer: W_dec_to_x_mu"
        self.b_enc_to_z_mu.name = "VAELayer: b_enc_to_z_mu"
        self.b_enc_to_z_logsigma.name = "VAELayer: b_enc_to_z_logsigma"
        self.b_dec_to_x_mu.name = "VAELayer: b_dec_to_x_mu"
Exemple #5
0
 def __init__(self, incomings, **kwargs):
     super(Q_Layer, self).__init__(incomings, **kwargs)
     self._srng = RandomStreams(get_rng().randint(1, 2147462579))
def random_normal(shape, mean=0.0, std=1.0, dtype=_FLOATX, seed=None):
    if seed is None:
        seed = np.random.randint(10e6)
    rng = RandomStreams(seed=seed)
    return rng.normal(size=shape, avg=mean, std=std, dtype=dtype)
Exemple #7
0
class VAE:
    def __init__(self,
                 n_in,
                 n_hidden,
                 n_out,
                 n_hidden_decoder=None,
                 trans_func=rectify,
                 batch_size=100):
        self.n_in = n_in
        self.n_hidden = n_hidden
        self.n_out = n_out
        self.batch_size = batch_size
        self.transf = trans_func
        self.l_in = InputLayer(shape=(batch_size, n_in))

        self.srng = RandomStreams()

        l_in_encoder = lasagne.layers.InputLayer(shape=(batch_size, n_in))
        l_in_decoder = lasagne.layers.InputLayer(shape=(batch_size, n_out))
        l_prev_encoder = l_in_encoder
        l_prev_decoder = l_in_decoder
        for i in range(len(n_hidden)):
            l_tmp_encoder = lasagne.layers.DenseLayer(l_prev_encoder,
                                                      num_units=n_hidden[i],
                                                      W=lasagne.init.Uniform(),
                                                      nonlinearity=self.transf)
            l_prev_encoder = l_tmp_encoder
        if n_hidden_decoder is None:
            n_hidden_decoder = n_hidden
        self.n_hidden_decoder = n_hidden_decoder
        for i in range(len(n_hidden_decoder)):
            l_tmp_decoder = lasagne.layers.DenseLayer(
                l_prev_decoder,
                num_units=n_hidden_decoder[-(i + 1)],
                W=lasagne.init.Uniform(),
                nonlinearity=self.transf)

            l_prev_decoder = l_tmp_decoder

        l_in = lasagne.layers.InputLayer(shape=(batch_size, n_in))
        self.model = VAELayer(
            l_in,
            encoder=l_prev_encoder,
            decoder=l_prev_decoder,
            latent_size=n_out,
            x_distribution='bernoulli',
            qz_distribution='gaussianmarg',  #gaussianmarg
            pz_distribution='gaussianmarg')
        self.x = T.matrix('x')

    def build_model(self, train_x, test_x, valid_x, update, update_args):
        self.train_x = train_x
        self.test_x = test_x
        self.validation_x = valid_x
        self.update = update
        self.update_args = update_args
        self.index = T.iscalar('index')
        self.batch_slice = slice(self.index * self.batch_size,
                                 (self.index + 1) * self.batch_size)

        x = self.srng.binomial(size=self.x.shape, n=1, p=self.x)
        log_pz, log_qz_given_x, log_px_given_z = self.model.get_log_distributions(
            self.x)
        loss_eval = (log_pz + log_px_given_z - log_qz_given_x).sum()
        loss_eval /= self.batch_size

        all_params = get_all_params(self.model)
        updates = self.update(-loss_eval, all_params, *self.update_args)

        train_model = theano.function(
            [self.index],
            loss_eval,
            updates=updates,
            givens={
                self.x: self.train_x[self.batch_slice],
            },
        )

        test_model = theano.function(
            [self.index],
            loss_eval,
            givens={
                self.x: self.test_x[self.batch_slice],
            },
        )

        validate_model = theano.function(
            [self.index],
            loss_eval,
            givens={
                self.x: self.validation_x[self.batch_slice],
            },
        )

        return train_model, test_model, validate_model

    def draw_sample(self, z):
        return self.model.draw_sample(z)

    def get_output(self, dat):
        z, _, _ = self.model.get_z_mu_sigma(dat)
        return z

    def get_reconstruction(self, z):
        return self.model.decoder_output(z)
Exemple #8
0
# print data.shape
# mask = numpy.zeros((90,90),dtype=numpy.float32)
# mask[30:,:]=1.
# train_features_numpy = data*mask.reshape(1,1,1,90,90)

#labels = numpy.argmax(datafile.root.yd.read(),1).astype(numpy.int32).reshape(-1,)

labels = datafile.root.y.read()

if not os.path.isdir(output_folder):
    os.makedirs(output_folder)
os.chdir(output_folder)

print '... instantiating model'
numpy_rng = numpy.random.RandomState(1)
theano_rng = MRG_RandomStreams(numpy_rng.randint(2**15))

x = T.matrix('x').reshape((batchsize, 2, 5, 90, 90))
y = T.matrix('y')

model = my_network(
    numpy_rng=numpy_rng,
    theano_rng=theano_rng,
    input=x,
    labels=y,
    Wl_path=
    '/home/konda/software/python_env/bin/project_odometry/Wl4CNN256Cr.npy',
    Wr_path=
    '/home/konda/software/python_env/bin/project_odometry/Wr4CNN256Cr.npy',
    image_shape=[8, 90, 90, batchsize],
    fsi=[8, 16, 16, n_filters],
Exemple #9
0
def main(gan, optimizer, do_batch_norm, n_epochs, epoch_size, batch_size,
         initial_eta, eta_decay, threshold, activation, noise_type, dump):

    # Load the dataset
    print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
    if threshold != 0.0:
        X_train[X_train >= threshold] = 1
        X_train[X_train < threshold] = 0
        X_test[X_test >= threshold] = 1
        X_test[X_test < threshold] = 0

    # Instantiate a symbolic noise generator to use for training
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
    srng = RandomStreams(seed=np.random.randint(2147462579, size=6))
    if noise_type == 'normal':
        noise = srng.normal((batch_size, 100), avg=0.0, std=1)
    elif noise_type == 'uniform':
        noise = srng.uniform((batch_size, 100))
    else:
        raise Exception("Noise {} not supported".format(noise_type))

    # Prepare Theano variables for inputs and targets
    noise_var = T.matrix('noise')
    input_var = T.tensor4('inputs')

    # Create neural network model
    print("Building model and compiling functions...")
    generator = build_generator(noise_var, do_batch_norm, activation)
    critic = build_critic(gan, input_var, do_batch_norm)

    # Create expression for passing real data through the critic
    fake_in = lasagne.layers.get_output(generator)
    real_out = lasagne.layers.get_output(critic)
    # Create expression for passing fake data through the critic
    fake_out = lasagne.layers.get_output(critic, fake_in)

    # Create loss expressions
    if gan == 'dcgan':
        # Create loss expressions
        generator_loss = lasagne.objectives.binary_crossentropy(fake_out, 1)
        generator_loss = generator_loss.mean()
        critic_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) +
                       lasagne.objectives.binary_crossentropy(fake_out, 0))
        critic_loss = critic_loss.mean()
    elif gan == 'lsgan':
        # a, b, c = -1, 1, 0  # Equation (8) in the paper
        a, b, c = 0, 1, 1  # Equation (9) in the paper
        generator_loss = lasagne.objectives.squared_error(fake_out, c).mean()
        critic_loss = (lasagne.objectives.squared_error(real_out, b).mean() +
                       lasagne.objectives.squared_error(fake_out, a).mean())
    elif gan in ('wgan', 'wgan-gp'):
        # original in Jan's code
        # generator_loss = fake_out.mean()
        # critic_loss = real_out.mean() - fake_out.mean()
        generator_loss = -fake_out.mean()
        critic_loss = -real_out.mean() + fake_out.mean()
        if gan == 'wgan-gp':
            # gradient penalty
            alpha = srng.uniform((batch_size, 1, 1, 1), low=0., high=1.)
            differences = fake_in - input_var
            interpolates = input_var + (alpha * differences)
            inter_out = lasagne.layers.get_output(critic, interpolates)
            gradients = theano.grad(inter_out.sum(), wrt=interpolates)
            slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1, 2, 3)))
            critic_penalty = 10 * T.mean((slopes - 1.)**2)
            # original in Jan's code
            # critic_loss -= critic_penalty
            critic_loss += critic_penalty
    else:
        raise Exception("GAN {} is not supported".format(gan))

    # Create update expressions for training
    generator_params = lasagne.layers.get_all_params(generator, trainable=True)
    critic_params = lasagne.layers.get_all_params(critic, trainable=True)
    eta = theano.shared(lasagne.utils.floatX(initial_eta))

    # choose the optimizer
    if optimizer == 'adam':
        generator_updates = lasagne.updates.adam(generator_loss,
                                                 generator_params,
                                                 learning_rate=eta,
                                                 beta1=0.5,
                                                 beta2=0.9)
        critic_updates = lasagne.updates.adam(critic_loss,
                                              critic_params,
                                              learning_rate=eta,
                                              beta1=0.5,
                                              beta2=0.9)
    elif optimizer == 'rmsprop':
        generator_updates = lasagne.updates.rmsprop(generator_loss,
                                                    generator_params,
                                                    learning_rate=eta)
        critic_updates = lasagne.updates.rmsprop(critic_loss,
                                                 critic_params,
                                                 learning_rate=eta)

    # Compile functions performing a training step on a mini-batch (according
    # to the updates dictionary) and returning the corresponding loss:
    generator_train_fn = theano.function([],
                                         generator_loss,
                                         givens={noise_var: noise},
                                         updates=generator_updates)
    critic_train_fn = theano.function([input_var],
                                      critic_loss,
                                      givens={noise_var: noise},
                                      updates=critic_updates)

    # Compile another function generating some data
    gen_fn = theano.function([noise_var],
                             lasagne.layers.get_output(generator,
                                                       deterministic=True))

    # Finally, launch the training loop.
    print("Starting training...")
    # We create an infinite supply of batches (as an iterable generator):
    batches = iterate_minibatches(X_train,
                                  y_train,
                                  batch_size,
                                  shuffle=True,
                                  forever=True)
    # build preffix and suffix str for saving files
    prefix = "{}_mnist".format(gan)
    suffix = "non_lin_{}_opt_{}_bn_{}_etadecay_{}_thresh_{}_noise_{}".format(
        activation, optimizer, do_batch_norm, eta_decay, threshold, noise_type)

    # We iterate over epochs:
    n_generator_updates = 0
    for epoch in tqdm(range(n_epochs)):
        # sample a batch of samples, plot them inc. histograms
        n_samples = 1000
        samples = gen_fn(lasagne.utils.floatX(np.random.rand(n_samples, 100)))
        plot_samples(
            gan, samples,
            "samples/{}_samples_{}_{}.png".format(prefix, epoch, suffix))
        plot_histogram(
            gan, samples, X_train, "{} : {} {}".format(gan, optimizer, epoch),
            "histograms/{}_hist_epoch_{}_{}.png".format(prefix, epoch, suffix))

        critic_scores = []
        generator_scores = []
        for _ in range(epoch_size):
            for _ in range(get_critic_runs(gan, n_generator_updates)):
                inputs, targets = next(batches)
                critic_scores.append(critic_train_fn(inputs))
            generator_scores.append(generator_train_fn())
            n_generator_updates += 1

        print("  generator loss:\t\t{}".format(np.mean(generator_scores)))
        print("  critic loss:\t\t{}".format(np.mean(critic_scores)))

        # After half the epochs, we start decaying the learn rate towards zero
        if eta_decay and epoch >= int(n_epochs / 2):
            progress = float(epoch) / n_epochs
            eta.set_value(
                lasagne.utils.floatX(initial_eta * 2 * (1 - progress)))

    # dump the network weights to a file:
    if dump:
        np.savez('models/{}_mnist_gen_{}.npz'.format(gan, suffix),
                 *lasagne.layers.get_all_param_values(generator))
        np.savez('models/{}_mnist_crit_{}.npz'.format(gan, suffix),
                 *lasagne.layers.get_all_param_values(critic))
Exemple #10
0
def main(num_epochs=200,
         convs=0,
         batchsize=64,
         initial_eta=5e-3,
         add_noise=True):
    # Load the dataset
    print("Loading data...")
    datapath = '/media/steampunkhd/rafaelvalle/datasets/MIDI/Piano'
    glob_file_str = '*.npy'
    n_pieces = 0  # 0 is equal to all pieces, unbalanced dataset
    crop = None  # (32, 96)
    as_dict = False
    inputs, _ = load_data(datapath, glob_file_str, n_pieces, crop, as_dict)

    # scale to [0, 1]
    # inputs = (inputs + 1) * 0.5

    # Prepare Theano variables for inputs and targets
    noise_var = T.matrix('noise')
    input_var = T.tensor4('inputs')

    # Instantiate a symbolic noise generator to use for training
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
    srng = RandomStreams(seed=np.random.randint(2147462579, size=6))

    # Create neural network model
    print("Building model and compiling functions...")
    generator = build_generator(noise_var, convs)
    discriminator = build_discriminator(input_var, convs)

    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator,
                                         lasagne.layers.get_output(generator))

    # Create loss expressions
    # one-sided label smoothing
    lbl_noise = 0.0
    if add_noise:
        lbl_noise = srng.normal(size=(3, ), avg=0.0, std=0.1)
        generator_loss = lasagne.objectives.binary_crossentropy(fake_out,
                                                                1).mean()
        discriminator_loss = (
            lasagne.objectives.binary_crossentropy(real_out, 1 + lbl_noise) +
            lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()

    # Create update expressions for training
    generator_params = lasagne.layers.get_all_params(generator, trainable=True)
    discriminator_params = lasagne.layers.get_all_params(discriminator,
                                                         trainable=True)
    eta = theano.shared(lasagne.utils.floatX(initial_eta))
    updates = lasagne.updates.adam(generator_loss,
                                   generator_params,
                                   learning_rate=eta,
                                   beta1=0.9)
    updates.update(
        lasagne.updates.adam(discriminator_loss,
                             discriminator_params,
                             learning_rate=eta,
                             beta1=0.9))

    noise = srng.uniform((batchsize, 100))

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var], [(real_out > .5).mean(),
                                             (fake_out < .5).mean()],
                               givens={noise_var: noise},
                               updates=updates)

    # Compile another function generating some data
    gen_fn = theano.function([noise_var],
                             lasagne.layers.get_output(generator,
                                                       deterministic=True))
    obs_length = 128
    print("Starting training...")
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(inputs, batchsize, length=obs_length):
            batch = lasagne.utils.floatX(batch)
            # reshape batch to proper dimensions
            batch = batch.reshape(
                (batch.shape[0], 1, batch.shape[1], batch.shape[2]))
            train_err += np.array(train_fn(batch))
            train_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{}".format(train_err / train_batches))

        # And finally, we plot some generated data
        samples = gen_fn(lasagne.utils.floatX(np.random.rand(42, noise_size)))
        plt.imsave(
            'images/dcgan_proll/proll_samples_epoch{}.png'.format(epoch),
            (samples.reshape(6, 7, obs_length, obs_length).transpose(
                0, 2, 1, 3).reshape(6 * obs_length, 7 * obs_length)).T,
            cmap='gray',
            origin='bottom')

        # After half the epochs, start decaying the learning rate towards zero
        if epoch >= num_epochs // 2:
            progress = float(epoch) / num_epochs
            eta.set_value(
                lasagne.utils.floatX(initial_eta * 2 * (1 - progress)))
Exemple #11
0
def apply_dropout(computation_graph, variables, drop_prob, rng=None,
                  seed=None, custom_divisor=None):
    """Apply dropout to specified variables in a graph.

    Parameters
    ----------
    computation_graph : instance of :class:`ComputationGraph`
        The computation graph.
    variables : list of :class:`~tensor.TensorVariable`
        Variables to be dropped out.
    drop_prob : float
        Probability of dropping out. If you want to apply the dropout
        with different probabilities for different layers, call it
        several times.
    rng : :class:`~theano.sandbox.rng_mrg.MRG_RandomStreams`
        Random number generator.
    seed : int
        Random seed to be used if `rng` was not specified.
    custom_divisor : float or None, optional
        Divide dropped variables by a given scalar value. If `None`,
        (default) dropped variables will be divided by `(1 - drop_prob)`
        which is equivalent to scaling by `(1 - drop_prob)` at test
        time as recommended in [DROPOUT]_.

    Returns
    -------
    dropped_computation_graph : instance of :class:`ComputationGraph`
        A new computation graph with dropout applied to the specified
        variables. In order to train with, or monitor, the outputs
        of the original computation graph with dropout applies, use
        the variables contained in `dropped_computation_graph.outputs`.

    Notes
    -----
    For more information, see [DROPOUT]_.

    .. [DROPOUT] Hinton et al. *Improving neural networks by preventing
       co-adaptation of feature detectors*, arXiv:1207.0580.

    Examples
    --------
    >>> import numpy
    >>> from theano import tensor, function
    >>> from blocks.bricks import MLP, Identity
    >>> from blocks.filter import VariableFilter
    >>> from blocks.initialization import Constant
    >>> from blocks.roles import INPUT
    >>> linear = MLP([Identity(), Identity()], [2, 10, 2],
    ...              weights_init=Constant(1), biases_init=Constant(2))
    >>> x = tensor.matrix('x')
    >>> y = linear.apply(x)
    >>> cg = ComputationGraph(y)

    We are going to drop out all the input variables

    >>> inputs = VariableFilter(roles=[INPUT])(cg.variables)

    Here we apply dropout with default setting to our computation graph

    >>> cg_dropout = apply_dropout(cg, inputs, 0.5)

    Dropped out variables have role `DROPOUT` and are tagged with
    `replacement_of` tag. Let's filter these variables and check if they
    have the links to original ones.

    >>> dropped_out = VariableFilter(roles=[DROPOUT])(cg_dropout.variables)
    >>> inputs_referenced = [var.tag.replacement_of for var in dropped_out]
    >>> set(inputs) == set(inputs_referenced)
    True

    Compiling theano functions to forward propagate in original and dropped
    out graphs

    >>> fprop = function(cg.inputs, cg.outputs[0])
    >>> fprop_dropout = function(cg_dropout.inputs, cg_dropout.outputs[0])

    Initialize an MLP and apply these functions

    >>> linear.initialize()
    >>> fprop(numpy.ones((3, 2),
    ...       dtype=theano.config.floatX))  # doctest:+ELLIPSIS
    array([[ 42.,  42.],
           [ 42.,  42.],
           [ 42.,  42.]]...
    >>> fprop_dropout(numpy.ones((3, 2),
    ...               dtype=theano.config.floatX))  # doctest:+ELLIPSIS
    array([[ 0.,  0.],
           [ 0.,  0.],
           [ 0.,  0.]]...

    And after the second run answer is different

    >>> fprop_dropout(numpy.ones((3, 2),
    ...               dtype=theano.config.floatX))  # doctest:+ELLIPSIS
    array([[   0.,   52.],
           [ 100.,    0.],
           [   0.,    0.]]...

    """
    if not rng and not seed:
        seed = config.default_seed
    if not rng:
        rng = MRG_RandomStreams(seed)
    if custom_divisor is None:
        divisor = (1 - drop_prob)
    else:
        divisor = custom_divisor
    replacements = [(var, var *
                     rng.binomial(var.shape, p=1 - drop_prob,
                                  dtype=theano.config.floatX) /
                     divisor)
                    for var in variables]
    for variable, replacement in replacements:
        add_role(replacement, DROPOUT)
        replacement.tag.replacement_of = variable

    return computation_graph.replace(replacements)
Exemple #12
0
class SampleLayer(lasagne.layers.MergeLayer):
    """
    Sampling layer supporting importance sampling as described in [BURDA]_ and
    multiple Monte Carlo samples for the approximation of
    E_q [log( p(x,z) / q(z|x) )].

    Parameters
    ----------
    mu : class:`Layer` instance
        Parameterizing the mean of the distribution to sample
        from as described in [BURDA]_.

    log_var : class:`Layer` instance
        By default assumed to parametrize log(sigma^2) of the distribution to
        sample from as described in [BURDA]_ which is transformed to sigma using
        the nonlinearity function as described below. Effectively this means
        that the nonlinearity function controls what log_var parametrizes. A few
        common examples:
        -nonlinearity = lambda x: T.exp(0.5*x) => log_var = log(sigma^2)[default]
        -nonlinearity = lambda x: T.sqrt(x) => log_var = sigma^2
        -nonlinearity = lambda x: x => log_var = sigma

    eq_samples : int or T.scalar
        Number of Monte Carlo samples used to estimate the expectation over
        q(z|x) in eq. (8) in [BURDA]_.

    iw_samples : int or T.scalar
        Number of importance samples in the sum over k in eq. (8) in [BURDA]_.

    nonlinearity : callable or None
        The nonlinearity that is applied to the log_var input layer to transform
        it into a standard deviation. By default we assume that
        log_var = log(sigma^2) and hence the corresponding nonlinearity is
        f(x) = T.exp(0.5*x) such that T.exp(0.5*log(sigma^2)) = sigma

    seed : int
        seed to random stream

    Methods
    ----------
    seed : Helper function to change the random seed after init is called

    References
    ----------
        ..  [BURDA] Burda, Yuri, Roger Grosse, and Ruslan Salakhutdinov.
            "Importance Weighted Autoencoders."
            arXiv preprint arXiv:1509.00519 (2015).
    """

    def __init__(self, mean, log_var,
                 eq_samples=1,
                 iw_samples=1,
                 nonlinearity=lambda x: T.exp(0.5*x),
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                  **kwargs):
        super(SampleLayer, self).__init__([mean, log_var], **kwargs)

        self.eq_samples = eq_samples
        self.iw_samples = iw_samples
        self.nonlinearity = nonlinearity

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shapes):
        batch_size, num_latent = input_shapes[0]
        if isinstance(batch_size, int) and \
           isinstance(self.iw_samples, int) and \
           isinstance(self.eq_samples, int):
            out_dim = (batch_size*self.eq_samples*self.iw_samples, num_latent)
        else:
            out_dim = (None, num_latent)
        return out_dim

    def get_output_for(self, input, **kwargs):
        mu, log_var = input
        batch_size, num_latent = mu.shape
        eps = self._srng.normal(
            [batch_size, self.eq_samples, self.iw_samples, num_latent],
             dtype=theano.config.floatX)

        z = mu.dimshuffle(0,'x','x',1) + \
            self.nonlinearity( log_var.dimshuffle(0,'x','x',1)) * eps

        return z.reshape((-1,num_latent))
Exemple #13
0
    def __init__(self, mean, log_var,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(SimpleSampleLayer, self).__init__([mean, log_var], **kwargs)

        self._srng = RandomStreams(seed)
        Each column corresponds to a different unit

    Returns:
        dW: a matrix of the derivatives of the expected gradient
            of the energy
    """

    raise NotImplementedError("TODO: implement this function.")


if __name__ == "__main__":
    m = 2
    nv = 3
    nh = 4
    h0 = T.alloc(1., m, nh)
    rng_factory = MRG_RandomStreams(42)
    W = rng_factory.normal(size=(nv, nh), dtype=h0.dtype)
    pv = T.nnet.sigmoid(T.dot(h0, W.T))
    v = rng_factory.binomial(p=pv, size=pv.shape, dtype=W.dtype)
    ph = T.nnet.sigmoid(T.dot(v, W))
    h = rng_factory.binomial(p=ph, size=ph.shape, dtype=W.dtype)

    class _ElemwiseNoGradient(theano.tensor.Elemwise):
        def grad(self, inputs, output_gradients):
            raise TypeError("You shouldn't be differentiating through "
                    "the sampling process.")
            return [ theano.gradient.DisconnectedType()() ]
    block_gradient = _ElemwiseNoGradient(theano.scalar.identity)

    v = block_gradient(v)
    h = block_gradient(h)
Exemple #15
0
from cle.cle.train import Training
from cle.cle.train.ext import (EpochCount, GradientClipping, Monitoring,
                               Picklize, EarlyStopping, WeightNorm)
from cle.cle.train.opt import Adam
from cle.cle.utils import init_tparams, sharedX
from cle.cle.utils.compat import OrderedDict
from cle.cle.utils.op import Gaussian_sample, GMM_sample, GMM_sampleY
from cle.cle.utils.gpu_op import concatenate

from preprocessing.ukdale import UKdale
from theano.sandbox.rng_mrg import MRG_RandomStreams
from ukdale_utils import plot_lines_iamondb_example

seed_rng = np.random.RandomState(np.random.randint(1024))
theano_seed = seed_rng.randint(np.iinfo(np.int32).max)
default_theano_rng = MRG_RandomStreams(theano_seed)


def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'valid_nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
Exemple #16
0
class FeedforwardNet_SVI:
    """
    Implements a feedforward neural network trained using stochastic variational inference.
    Supports various types of layers and loss functions.
    """
    def __init__(self, n_inputs):
        """
        Constructs a net with a given number of inputs and no layers.
        """

        assert util.math.isposint(
            n_inputs), 'Number of inputs must be a positive integer.'

        self.n_inputs = n_inputs
        self.n_outputs = n_inputs
        self.n_units = [n_inputs]
        self.n_layers = 0
        self.n_params = 0

        self.mWs = []
        self.mbs = []
        self.sWs = []
        self.sbs = []
        self.uas = []
        self.mas = []
        self.zas = []
        self.hs = [tt.matrix('x')]

        self.mps = self.mWs + self.mbs
        self.sps = self.sWs + self.sbs
        self.parms = self.mps + self.sps
        self.input = self.hs[0]
        self.output = self.hs[-1]

        self.srng = RandomStreams()

        # theano functions
        self.eval_f = None
        self.eval_f_rand = None

    def reset_theano_functions(self):
        """
        Resets theano functions, so that they are compiled again when needed.
        """

        self.eval_f = None
        self.eval_f_rand = None

    def addLayer(self, n_units, type, rng=np.random):
        """
        Adds a new layer to the network,
        :param n_units: number of units in the layer
        :param type: a string specification of the activation function
        """

        # check number of units
        assert util.math.isposint(
            n_units), 'Number of units must be a positive integer.'

        # choose activation function
        actfun = util.ml.select_theano_act_function(type, dtype)

        n_prev_units = self.n_outputs
        self.n_outputs = n_units
        self.n_units.append(n_units)
        self.n_layers += 1
        self.n_params += 2 * (n_prev_units + 1) * n_units

        mW = theano.shared((rng.randn(n_prev_units, n_units) /
                            np.sqrt(n_prev_units + 1)).astype(dtype),
                           name='mW' + str(self.n_layers),
                           borrow=True)
        mb = theano.shared(np.zeros(n_units, dtype=dtype),
                           name='mb' + str(self.n_layers),
                           borrow=True)
        sW = theano.shared(-5.0 *
                           np.ones([n_prev_units, n_units], dtype=dtype),
                           name='sW' + str(self.n_layers),
                           borrow=True)
        sb = theano.shared(-5.0 * np.ones(n_units, dtype=dtype),
                           name='sb' + str(self.n_layers),
                           borrow=True)
        ua = self.srng.normal((self.hs[-1].shape[0], n_units), dtype=dtype)
        ma = tt.dot(self.hs[-1], mW) + mb
        sa = tt.dot(self.hs[-1]**2, tt.exp(2 * sW)) + tt.exp(2 * sb)
        za = tt.sqrt(sa) * ua + ma
        h = actfun(za)
        h.name = 'h' + str(self.n_layers)

        self.mWs.append(mW)
        self.mbs.append(mb)
        self.sWs.append(sW)
        self.sbs.append(sb)
        self.uas.append(ua)
        self.mas.append(ma)
        self.zas.append(za)
        self.hs.append(h)

        self.mps = self.mWs + self.mbs
        self.sps = self.sWs + self.sbs
        self.parms = self.mps + self.sps
        self.output = self.hs[-1]

        self.reset_theano_functions()

    def removeLayer(self):
        """
        Removes a layer from the network.
        """

        assert self.n_layers > 0, 'There is no layer to remove.'

        n_params_to_rem = 2 * self.n_outputs * (self.n_units[-2] + 1)
        self.n_outputs = self.n_units[-2]
        self.n_units.pop()
        self.n_layers -= 1
        self.n_params -= n_params_to_rem

        self.mWs.pop()
        self.mbs.pop()
        self.sWs.pop()
        self.sbs.pop()
        self.uas.pop()
        self.mas.pop()
        self.zas.pop()
        self.hs.pop()

        self.mps = self.mWs + self.mbs
        self.sps = self.sWs + self.sbs
        self.parms = self.mps + self.sps
        self.output = self.hs[-1]

        self.reset_theano_functions()

    def eval(self, x, rand=False):
        """
        Evaluate net at locations in x.
        """

        x = np.asarray(x, dtype=dtype)

        if rand:

            # compile theano computation graph, if haven't already done so
            if self.eval_f_rand is None:

                n_data = tt.iscalar('n_data')
                uas = [
                    tt.tile(self.srng.normal((n_units, ), dtype=dtype),
                            [n_data, 1]) for n_units in self.n_units[1:]
                ]

                self.eval_f_rand = theano.function(inputs=[self.hs[0], n_data],
                                                   outputs=self.hs[-1],
                                                   givens=list(
                                                       zip(self.uas, uas)))

            return self.eval_f_rand(x[np.newaxis, :],
                                    1)[0] if x.ndim == 1 else self.eval_f_rand(
                                        x, x.shape[0])

        else:

            # compile theano computation graph, if haven't already done so
            if self.eval_f is None:
                self.eval_f = theano.function(inputs=[self.hs[0]],
                                              outputs=self.hs[-1],
                                              givens=list(
                                                  zip(self.zas, self.mas)))

            return self.eval_f(
                x[np.newaxis, :])[0] if x.ndim == 1 else self.eval_f(x)

    def printInfo(self):
        """
        Prints some useful info about the net.
        """

        print('Number of inputs  =', self.n_inputs)
        print('Number of outputs =', self.n_outputs)
        print('Number of units   =', self.n_units)
        print('Number of layers  =', self.n_layers)
        print('Number of params  =', self.n_params)
        print('Data type =', dtype)

    def visualize_weights(self, layer, imsize, layout):
        """
        Displays the weights of a specified layer as images.
        :param layer: the layer whose weights to display
        :param imsize: the image size
        :param layout: number of rows and columns for each page
        :return: none
        """

        util.plot.disp_imdata(self.mWs[layer].get_value().T, imsize, layout)
        plt.show(block=False)

    def visualize_activations(self, x, layers=None):
        """
        Visualizes the activations of specified layers caused by a given data minibatch.
        :param x: a minibatch of data
        :param layers: list of layers to visualize activations of; defaults to the whole net except the input layer
        :return: none
        """

        if layers is None:
            layers = range(self.n_layers)

        forwprop = theano.function(inputs=[self.hs[0]], outputs=self.hs[1:])
        hs = forwprop(x.astype(dtype))

        for l in layers:

            fig = plt.figure()
            ax = fig.add_subplot(1, 1, 1)
            ax.imshow(hs[l], cmap='gray', interpolation='none')
            ax.set_title('Layer ' + str(l))
            ax.set_xlabel('layer units')
            ax.set_ylabel('data points')

        plt.show(block=False)

    def param_hist(self, layers=None):
        """
        Displays a histogram of weights and biases for specified layers.
        :param layers: list of layers to show histograms for; defaults to the whole net
        :return: none
        """

        if layers is None:
            layers = range(self.n_layers)

        for l in layers:

            fig, axs = plt.subplots(2, 2)

            nbins = int(np.sqrt(self.mWs[l].get_value().size))
            axs[0, 0].hist(self.mWs[l].get_value().flatten(),
                           nbins,
                           normed=True)
            axs[0, 0].set_title('weight means, layer ' + str(l))
            axs[1, 0].hist(self.sWs[l].get_value().flatten(),
                           nbins,
                           normed=True)
            axs[1, 0].set_title('weight log stds, layer ' + str(l))

            nbins = int(np.sqrt(self.mbs[l].get_value().size))
            axs[0, 1].hist(self.mbs[l].get_value(), nbins, normed=True)
            axs[0, 1].set_title('bias means, layer ' + str(l))
            axs[1, 1].hist(self.sbs[l].get_value(), nbins, normed=True)
            axs[1, 1].set_title('bias log stds, layer ' + str(l))

        plt.show(block=False)
Exemple #17
0
def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None):
    if seed is None:
        seed = np.random.randint(1, 10e6)
    rng = RandomStreams(seed=seed)
    return rng.binomial(shape, p=p, dtype=dtype)
Exemple #18
0
def sample(p, seed=None):
    if seed is None:
        seed = np.random.randint(10e6)
    rng = RandomStreams(seed=seed)
    return rng.multinomial(n=1, pvals=p, dtype=theano.config.floatX)
def random_uniform(shape, low=0.0, high=1.0, dtype=_FLOATX, seed=None):
    if seed is None:
        seed = np.random.randint(10e6)
    rng = RandomStreams(seed=seed)
    return rng.uniform(shape, low=low, high=high, dtype=dtype)
Exemple #20
0
 def __init__(self, incoming, sigma=0.1, **kwargs):
     super(MultiplicativeGaussianNoiseLayer,
           self).__init__(incoming, **kwargs)
     self._srng = RandomStreams(get_rng().randint(1, 2147462579))
     self.sigma = sigma
Exemple #21
0
class VAELayer(Layer):
    def __init__(self,
                 incoming,
                 encoder,
                 decoder,
                 x_distribution='bernoulli',
                 pz_distribution='gaussian',
                 qz_distribution='gaussian',
                 latent_size=50,
                 W=init.Normal(0.01),
                 b=init.Normal(0.01),
                 **kwargs):
        super(VAELayer, self).__init__(incoming, **kwargs)
        num_batch, n_features = self.input_shape
        self.num_batch = num_batch
        self.n_features = n_features
        self.x_distribution = x_distribution
        self.pz_distribution = pz_distribution
        self.qz_distribution = qz_distribution
        self.encoder = encoder
        self.decoder = decoder
        self._srng = RandomStreams()

        if self.x_distribution not in ['gaussian', 'bernoulli']:
            raise NotImplementedError
        if self.pz_distribution not in ['gaussian', 'gaussianmarg']:
            raise NotImplementedError
        if self.qz_distribution not in ['gaussian', 'gaussianmarg']:
            raise NotImplementedError

        self.params_encoder = lasagne.layers.get_all_params(encoder)
        self.params_decoder = lasagne.layers.get_all_params(decoder)
        for p in self.params_encoder:
            p.name = "VAELayer encoder :" + p.name
        for p in self.params_decoder:
            p.name = "VAELayer decoder :" + p.name

        self.num_hid_enc = encoder.output_shape[1]
        self.num_hid_dec = decoder.output_shape[1]
        self.latent_size = latent_size

        self.W_enc_to_z_mu = self.add_param(W, (self.num_hid_enc, latent_size))
        self.b_enc_to_z_mu = self.add_param(b, (latent_size, ))

        self.W_enc_to_z_logsigma = self.add_param(
            W, (self.num_hid_enc, self.latent_size))
        self.b_enc_to_z_logsigma = self.add_param(b, (latent_size, ))

        self.W_dec_to_x_mu = self.add_param(
            W, (self.num_hid_dec, self.n_features))
        self.b_dec_to_x_mu = self.add_param(b, (self.n_features, ))

        self.W_params = [
            self.W_enc_to_z_mu, self.W_enc_to_z_logsigma, self.W_dec_to_x_mu
        ] + self.params_encoder + self.params_decoder
        self.bias_params = [
            self.b_enc_to_z_mu, self.b_enc_to_z_logsigma, self.b_dec_to_x_mu
        ]

        params_tmp = []
        if self.x_distribution == 'gaussian':
            self.W_dec_to_x_logsigma = self.add_param(
                W, (self.num_hid_dec, self.n_features))
            self.b_dec_to_x_logsigma = self.add_param(b, (self.n_features, ))
            self.W_params += [self.W_dec_to_x_logsigma]
            self.bias_params += [self.b_dec_to_x_logsigma]
            self.W_dec_to_x_logsigma.name = "VAE: W_dec_to_x_logsigma"
            self.b_dec_to_x_logsigma.name = "VAE: b_dec_to_x_logsigma"
            params_tmp = [self.W_dec_to_x_logsigma, self.b_dec_to_x_logsigma]

        self.params = self.params_encoder + [self.W_enc_to_z_mu,
                                             self.b_enc_to_z_mu,
                                             self.W_enc_to_z_logsigma,
                                             self.b_enc_to_z_logsigma] + self.params_decoder + \
                      [self.W_dec_to_x_mu, self.b_dec_to_x_mu] + params_tmp

        self.W_enc_to_z_mu.name = "VAELayer: W_enc_to_z_mu"
        self.W_enc_to_z_logsigma.name = "VAELayer: W_enc_to_z_logsigma"
        self.W_dec_to_x_mu.name = "VAELayer: W_dec_to_x_mu"
        self.b_enc_to_z_mu.name = "VAELayer: b_enc_to_z_mu"
        self.b_enc_to_z_logsigma.name = "VAELayer: b_enc_to_z_logsigma"
        self.b_dec_to_x_mu.name = "VAELayer: b_dec_to_x_mu"

    def get_params(self):
        return self.params

    def get_output_shape_for(self, input_shape):
        dec_out_shp = self.decoder.get_output_shape_for(
            (self.num_batch, self.num_hid_dec))
        if self.x_distribution == 'bernoulli':
            return dec_out_shp
        elif self.x_distribution == 'gaussian':
            return [dec_out_shp, dec_out_shp]

    def _encoder_output(self, x, *args, **kwargs):
        return lasagne.layers.get_output(self.encoder, x, **kwargs)

    def decoder_output(self, z, *args, **kwargs):
        h_decoder = lasagne.layers.get_output(self.decoder, z, **kwargs)
        if self.x_distribution == 'gaussian':
            mu_decoder = T.dot(h_decoder,
                               self.W_dec_to_x_mu) + self.b_dec_to_x_mu
            log_sigma_decoder = T.dot(
                h_decoder, self.W_dec_to_x_logsigma) + self.b_dec_to_x_logsigma
            decoder_out = mu_decoder, log_sigma_decoder
        elif self.x_distribution == 'bernoulli':
            # TODO: Finish writing the output of the decoder for a bernoulli distributed x.
            decoder_out = T.nnet.sigmoid(
                T.dot(h_decoder, self.W_dec_to_x_mu) + self.b_dec_to_x_mu)
        else:
            raise NotImplementedError
        return decoder_out

    def get_z_mu_sigma(self, x, *args, **kwargs):
        h_encoder = self._encoder_output(x, *args, **kwargs)
        mu_encoder = T.dot(h_encoder, self.W_enc_to_z_mu) + self.b_enc_to_z_mu
        log_sigma_encoder = (T.dot(h_encoder, self.W_enc_to_z_logsigma) +
                             self.b_enc_to_z_logsigma)
        eps = self._srng.normal(log_sigma_encoder.shape)
        # TODO: Calculate the sampled z.
        z = mu_encoder + T.exp(0.5 * log_sigma_encoder) * eps
        return z, mu_encoder, log_sigma_encoder

    def get_log_distributions(self, x, *args, **kwargs):
        # sample z from q(z|x).
        h_encoder = self._encoder_output(x, *args, **kwargs)
        mu_encoder = T.dot(h_encoder, self.W_enc_to_z_mu) + self.b_enc_to_z_mu
        log_sigma_encoder = (T.dot(h_encoder, self.W_enc_to_z_logsigma) +
                             self.b_enc_to_z_logsigma)
        eps = self._srng.normal(log_sigma_encoder.shape)
        z = mu_encoder + T.exp(0.5 * log_sigma_encoder) * eps

        # forward pass z through decoder to generate p(x|z).
        decoder_out = self.decoder_output(z, *args, **kwargs)
        if self.x_distribution == 'bernoulli':
            x_mu = decoder_out
            log_px_given_z = -T.nnet.binary_crossentropy(x_mu, x)
        elif self.x_distribution == 'gaussian':
            x_mu, x_logsigma = decoder_out
            log_px_given_z = normal2(x, x_mu, x_logsigma)

        # sample prior distribution p(z).
        if self.pz_distribution == 'gaussian':
            log_pz = standard_normal(z)
        elif self.pz_distribution == 'gaussianmarg':
            log_pz = -0.5 * (T.log(2 * np.pi) +
                             (T.sqr(mu_encoder) + T.exp(log_sigma_encoder)))

        # variational approximation distribution q(z|x)
        if self.qz_distribution == 'gaussian':
            log_qz_given_x = normal2(z, mu_encoder, log_sigma_encoder)
        elif self.qz_distribution == 'gaussianmarg':
            log_qz_given_x = -0.5 * (T.log(2 * np.pi) + 1 + log_sigma_encoder)

        # sum over dim 1 to get shape (,batch_size)
        log_px_given_z = log_px_given_z.sum(
            axis=1, dtype=theano.config.floatX)  # sum over x
        log_pz = log_pz.sum(axis=1,
                            dtype=theano.config.floatX)  # sum over latent vars
        log_qz_given_x = log_qz_given_x.sum(
            axis=1, dtype=theano.config.floatX)  # sum over latent vars

        return log_pz, log_qz_given_x, log_px_given_z

    def draw_sample(self, z=None, *args, **kwargs):
        if z is None:  # draw random z
            z = self._srng.normal((self.num_batch, self.latent_size))
        return self.decoder_output(z, *args, **kwargs)
Exemple #22
0
 def __init__(self, p):
     super(Dropout, self).__init__()
     self.p = p
     self.srng = RandomStreams(seed=np.random.randint(10e6))
Exemple #23
0
LATENT_DIM = args.latent_dim
ALPHA_ITERS = args.alpha_iters
VANILLA = False
LR = 1e-3

BATCH_SIZE = 100
N_CHANNELS = 1
HEIGHT = 28
WIDTH = 28

TEST_BATCH_SIZE = 100
TIMES = ('iters', 500, 500 * 400, 500, 400 * 500, 2 * ALPHA_ITERS)

lib.print_model_settings(locals().copy())

theano_srng = RandomStreams(seed=234)

np.random.seed(123)


def PixCNNGate(x):
    a = x[:, ::2]
    b = x[:, 1::2]
    return T.tanh(a) * T.nnet.sigmoid(b)


def PixCNN_condGate(x, z, dim, activation='tanh', name=""):
    a = x[:, ::2]
    b = x[:, 1::2]

    Z_to_tanh = lib.ops.linear.Linear(name + ".tanh",
Exemple #24
0
class RandomizedRectifierLayer(Layer):
    """
    A layer that applies a randomized leaky rectify nonlinearity to its input.

    The randomized leaky rectifier was first proposed and used in the Kaggle
    NDSB Competition, and later evaluated in [1]_. Compared to the standard
    leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope
    for negative input during training, and a fixed slope during evaluation.

    Equation for the randomized rectifier linear unit during training:
    :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)`

    During evaluation, the factor is fixed to the arithmetic mean of `lower`
    and `upper`.

    Parameters
    ----------
    incoming : a :class:`Layer` instance or a tuple
        The layer feeding into this layer, or the expected input shape

    lower : Theano shared variable, expression, or constant
        The lower bound for the randomly chosen slopes.

    upper : Theano shared variable, expression, or constant
        The upper bound for the randomly chosen slopes.

    shared_axes : 'auto', 'all', int or tuple of int
        The axes along which the random slopes of the rectifier units are
        going to be shared. If ``'auto'`` (the default), share over all axes
        except for the second - this will share the random slope over the
        minibatch dimension for dense layers, and additionally over all
        spatial dimensions for convolutional layers. If ``'all'``, share over
        all axes, thus using a single random slope.

    **kwargs
        Any additional keyword arguments are passed to the `Layer` superclass.

     References
    ----------
    .. [1] Bing Xu, Naiyan Wang et al. (2015):
       Empirical Evaluation of Rectified Activations in Convolutional Network,
       http://arxiv.org/abs/1505.00853
    """
    def __init__(self, incoming, lower=0.3, upper=0.8, shared_axes='auto',
                 **kwargs):
        super(RandomizedRectifierLayer, self).__init__(incoming, **kwargs)
        self._srng = RandomStreams(get_rng().randint(1, 2147462579))
        self.lower = lower
        self.upper = upper

        if not isinstance(lower > upper, theano.Variable) and lower > upper:
            raise ValueError("Upper bound for RandomizedRectifierLayer needs "
                             "to be higher than lower bound.")

        if shared_axes == 'auto':
            self.shared_axes = (0,) + tuple(range(2, len(self.input_shape)))
        elif shared_axes == 'all':
            self.shared_axes = tuple(range(len(self.input_shape)))
        elif isinstance(shared_axes, int):
            self.shared_axes = (shared_axes,)
        else:
            self.shared_axes = shared_axes

    def get_output_for(self, input, deterministic=False, **kwargs):
        """
        Parameters
        ----------
        input : tensor
            output from the previous layer
        deterministic : bool
            If true, the arithmetic mean of lower and upper are used for the
            leaky slope.
        """
        if deterministic or self.upper == self.lower:
            return theano.tensor.nnet.relu(input, (self.upper+self.lower)/2.0)
        else:
            shape = list(self.input_shape)
            if any(s is None for s in shape):
                shape = list(input.shape)
            for ax in self.shared_axes:
                shape[ax] = 1

            rnd = self._srng.uniform(tuple(shape),
                                     low=self.lower,
                                     high=self.upper,
                                     dtype=theano.config.floatX)
            rnd = theano.tensor.addbroadcast(rnd, *self.shared_axes)
            return theano.tensor.nnet.relu(input, rnd)
Exemple #25
0
class DropoutLayer(BaseLayer):
    """
    This class implements dropout for layer output energy (activations).
    """
    def __init__(self, drop_probability=0.5, rescale=False, seed=4455):
        """
        This function initializes the class.
        Input and output tensor shape is equal.

        Parameters
        ----------
        drop_probability: float, default: 0.5
            a float value ratio of how many activations will be zero.
        rescale: bool, default: True
            a bool value whether we rescale the output or not.
            multiply ratio and preserve the variance.
        seed: int
            an integer for random seed.
        """
        super(DropoutLayer, self).__init__()
        # check asserts
        assert drop_probability >= 0 and drop_probability < 1, '"drop_probability" should be in range [0, 1).'
        assert isinstance(rescale, bool), '"rescale" should be a bool value whether we use dropout rescaling or not.'

        # set members        
        self.drop_probability = drop_probability
        self.rescale = rescale
        self.rng = MRG(seed)  # random number generator

    def set_shared(self):
        """
        This function overrides the parents' one.
        Set shared Variables.

        Shared Variables
        ----------------
        flag: scalar
            a scalar value to distinguish training mode and inference mode.
        """        
        self.flag = theano.shared(1, self.name + '_flag')  # 1: train / -1: inference
        self.flag.tags = ['flag', self.name]

    def change_flag(self, new_flag):
        """
        This function change flag to change training and inference mode.
        If flag > 0, training mode, else, inference mode.

        Parameters
        ---------
        new_flag: int (or float)
            a single scalar value to be a new flag.
        """
        self.flag.set_value(float(new_flag)) # 1: train / -1: inference

    def get_output(self, input_):
        """
        This function overrides the parents' one.
        Creates symbolic function to compute output from an input.
        The symbolic function use theano switch function conditioned by flag.

        Math Expression
        ---------------
        For inference:
            y = x
        For training:
            mask ~ U[0, 1] and sampled to binomial.
            y = 1 / ( 1 - drop_probability) * x * mask

        Parameters
        ----------
        input_: TensorVariable

        Returns
        -------
        Tensorvariable         
        """
        if self.rescale is True:
            coeff = 1 / (1 - self.drop_probability)
        else:
            coeff = 1
        mask = self.rng.binomial(input_.shape, p=1 - self.drop_probability, dtype=input_.dtype)
        return T.switch(T.gt(self.flag, 0), input_ * mask * coeff, input_)
Exemple #26
0
 def __init__(self,
              sources,
              n_out,
              index,
              y_in=None,
              target=None,
              target_index=None,
              sparse=False,
              cost_scale=1.0,
              input_scale=1.0,
              L1=0.0,
              L2=0.0,
              L2_eye=None,
              varreg=0.0,
              output_L2_reg=0.0,
              output_entropy_reg=0.0,
              output_entropy_exp_reg=0.0,
              with_bias=True,
              mask="unity",
              dropout=0.0,
              batch_drop=False,
              batch_norm=False,
              bn_use_sample=False,
              layer_drop=0.0,
              residual=False,
              carry=False,
              sparse_filtering=False,
              gradient_scale=1.0,
              trainable=True,
              device=None,
              dtype='float32',
              **kwargs):
     """
 :param list[NetworkBaseLayer.Layer] sources: list of source layers
 :param int n_out: output dim of W_in and dim of bias
 :param float L1: l1-param-norm regularization
 :param float L2: l2-param-norm regularization
 :param str mask: "unity" or "dropout"
 :type dropout: float
 """
     super(Layer, self).__init__(**kwargs)
     self.index = index
     self.sources = sources
     ":type: list[Layer]"
     self.num_sources = len(sources)
     self.D = max([s.D for s in sources if isinstance(s, Layer)] + [0])
     if mask is None: mask = 'none'
     self.set_attr('mask', mask)
     self.set_attr('dropout', dropout)
     self.set_attr('sparse', sparse)
     self.set_attr('bn_use_sample', bn_use_sample)
     self.set_attr('sparse_filtering', sparse_filtering)
     if not trainable:
         self.set_attr('trainable', trainable)  # only store if not default
         self.gradient_scale = 0.0  # just to be sure
     else:
         self.gradient_scale = gradient_scale
     if gradient_scale != 1.0:
         self.set_attr('gradient_scale', gradient_scale)
     self.set_attr('layer_drop', layer_drop)
     assert not carry, "not supported anymore"
     self.set_attr('residual', residual)
     self.set_attr('n_out', n_out)
     self.set_attr('L1', L1)
     self.set_attr('L2', L2)
     if L2_eye:
         self.set_attr('L2_eye', L2_eye)
     self.device = device  # if device else str(theano.config.device)
     for s in self.sources:
         s.transfer_output(self.device)
     self.set_attr('varreg', varreg)
     if output_L2_reg:
         self.set_attr('output_L2_reg', output_L2_reg)
     if output_entropy_reg:
         self.set_attr('output_entropy_reg', output_entropy_reg)
     if output_entropy_exp_reg:
         self.set_attr('output_entropy_exp_reg', output_entropy_exp_reg)
     self.set_attr('batch_norm', batch_norm)
     self.set_attr('input_scale', input_scale)
     if y_in is not None:
         self.y_in = {}
         for k in y_in:
             if not isinstance(y_in[k], T.Variable): continue
             self.y_in[k] = time_batch_make_flat(
                 y_in[k])  # TODO: better not flatten here...
             self.y_in[k].n_out = getattr(y_in[k], "n_out", None)
     else:
         self.y_in = None
     self.constraints = T.constant(0)
     if target:
         self.set_attr('target', target)
     if target_index:
         self.set_attr('target_index', target_index)
         assert target_index in self.network.j
         self.index = index = self.network.j[target_index]
     if cost_scale != 1:
         self.set_attr("cost_scale", cost_scale)
     if with_bias:
         self.b = self.add_param(self.create_bias(n_out),
                                 'b_%s' % self.name)
     else:
         self.set_attr('with_bias', False)
         self.b = numpy.float32(0)
     self.mass = T.constant(1., name="mass_%s" % self.name, dtype='float32')
     self.masks = [None] * len(self.sources)
     assert mask in ['dropout', 'unity', 'none'], "invalid mask: %s" % mask
     if mask == "dropout" or (mask == 'none' and dropout > 0):
         assert 0.0 < dropout < 1.0
         # If we apply this mass during training then we don't need any mask or mass for testing.
         # The expected weight should be 1 in
         #   E[x] = mass * (1-dropout)
         # so mass has to be 1 / (1 - dropout).
         self.mass = T.constant(1.0 / (1.0 - dropout), dtype='float32')
         from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
         srng = RandomStreams(self.rng.randint(1234) + 1)
         if self.depth > 1:
             self.masks = [
                 T.cast(
                     srng.binomial(n=1,
                                   p=1 - dropout,
                                   size=(s.attrs['n_out'], self.depth)),
                     theano.config.floatX) for s in self.sources
             ]
         else:
             if batch_drop:
                 self.masks = [
                     T.cast(
                         srng.binomial(n=1,
                                       p=1 - dropout,
                                       size=s.output.shape),
                         theano.config.floatX) for s in self.sources
                 ]
             else:
                 self.masks = [
                     T.cast(
                         srng.binomial(n=1,
                                       p=1 - dropout,
                                       size=(s.attrs['n_out'], )),
                         theano.config.floatX) for s in self.sources
                 ]