Esempio n. 1
0
    def train(self,
              x,
              epochs=15,
              lr=0.01,
              batch_size=20,
              corruption_level=0.3,
              regularization=0):
        n_batch = x.shape[0] // batch_size
        corrupt_x = corrupt(x,
                            corruption_level)  # add noise to the original data
        learning_curve_list = []

        for i in range(epochs):
            Loss = []
            if i == 0:
                start_time = time.clock()
            for j in range(n_batch):
                batch_x = x[j * batch_size:(j + 1) *
                            batch_size]  # get minibatch of original data
                corrupt_batch_x = corrupt_x[
                    j * batch_size:(j + 1) *
                    batch_size]  # get minibatch of corrupted data

                hidden_in, cache1 = affine_forward(corrupt_batch_x, self.W1,
                                                   self.b1)
                hidden_out, cache2 = sigmoid_forward(hidden_in)
                reconstruct_in, cache3 = affine_forward(
                    hidden_out, self.W2, self.b2)

                batch_loss, dscore = cross_entropy_loss(
                    reconstruct_in, batch_x)
                reg_loss = regularization * 0.5 * self.W1 * self.W1
                loss = batch_loss + reg_loss
                Loss.append(loss)
                """back propagation"""
                grad_W2, grad_b2, grad_hidden_out = affine_backward(
                    dscore, cache3)
                grad_hidden_in = sigmoid_backward(grad_hidden_out, cache2)
                grad_W1, grad_b1, _ = affine_backward(grad_hidden_in, cache1)
                """update parameters"""
                self.W1 -= lr * (grad_W1 + grad_W2.T +
                                 regularization * self.W1)
                self.b1 -= lr * (grad_b1)
                self.b2 -= lr * (grad_b2)

            mean_loss = np.mean(Loss)
            learning_curve_list.append(mean_loss)
            print("average loss is: %f, at epoch: %d" % (mean_loss, i))
def autoencoder(dimensions=[784, 512, 256, 64]):
    """Build a deep denoising autoencoder w/ tied weights.

    Parameters
    ----------
    dimensions : list, optional
        The number of neurons for each layer of the autoencoder.

    Returns
    -------
    x : Tensor
        Input placeholder to the network
    z : Tensor
        Inner-most latent representation
    y : Tensor
        Output reconstruction of the input
    cost : Tensor
        Overall cost to use for training
    """
    # input to the network
    x = tf.placeholder(tf.float32, [None, dimensions[0]], name='x')
    current_input = corrupt(x)
    # Build the encoder
    encoder = []
    for layer_i, n_output in enumerate(dimensions[1:]):
        n_input = int(current_input.get_shape()[1])
        W = tf.Variable(
            tf.random_uniform([n_input, n_output], -1.0 / math.sqrt(n_input),
                              1.0 / math.sqrt(n_input)))
        b = tf.Variable(tf.zeros([n_output]))
        encoder.append(W)
        output = tf.nn.tanh(tf.matmul(current_input, W) + b)
        current_input = output
    # latent representation
    z = current_input
    encoder.reverse()
    # Build the decoder using the same weights
    for layer_i, n_output in enumerate(dimensions[:-1][::-1]):
        W = tf.transpose(encoder[layer_i])
        b = tf.Variable(tf.zeros([n_output]))
        output = tf.nn.tanh(tf.matmul(current_input, W) + b)
        current_input = output
    # now have the reconstruction through the network
    y = current_input
    # cost function measures pixel-wise difference
    cost = tf.sqrt(tf.reduce_mean(tf.square(y - x)))
    return {'x': x, 'z': z, 'y': y, 'cost': cost}
Esempio n. 3
0
 def train_step(test_xs):
     batch_size = flags.batch_size
     # batch_size = 50
     n_epochs = flags.num_epoch
     mask = np.random.binomial(
         1, 1 - flags.corrupt_prob,
         (int(np.round(batch_size * flags.validation)) + 1, 225))
     # print(mask[:5])
     for epoch_i in range(n_epochs):
         # print dataset_train.shape[1] // batch_size
         datasets = utils.loadDataset(batch_size=batch_size,
                                      max=flags.max,
                                      dataset_dir=flags.datasetPath)
         f = 0
         for dataset in datasets:
             dataset_train, dataset_test = partition(dataset,
                                                     shuffle=False)
             mean_img = np.mean(dataset_train, axis=1)
             dataset_train = np.array(
                 [img - mean_img for img in dataset_train.T])
             dataset_train = dataset_train.T
             dataset_train_, dataset_train = corrupt(dataset_train,
                                                     mask=mask)
             _, score, step, summaries = sess.run(
                 [train_op, ae.score, global_step, train_summary_op],
                 feed_dict={
                     ae.x: dataset_train,
                     ae.x_: dataset_train_
                 })
             current_step = tf.train.global_step(sess, global_step)
             if current_step % 100 == 0:
                 print("epoch:{} step:{} score:{}".format(
                     epoch_i, step, score))
             train_summary_writer.add_summary(summaries, step)
             if current_step % 1000 == 0:
                 path = saver.save(sess,
                                   checkpoint_prefix,
                                   global_step=current_step)
                 print("Saved model checkpoint to {}\n".format(path))
         # score, step, summaries, output, W= sess.run([ae.score, global_step, dev_summary_op, ae.output, ae.encoder], feed_dict={
         #         ae.x: test_xs,
         #         ae.x_: test_xs})
         # print("evaluation:\nscore:{}".format(score))
     test_xs = np.asarray(test_xs)
     print("Testxs : " + str(test_xs.shape))
     return test_xs
def autoencoder(dimensions=[784, 512, 256, 64]):
    """Build a deep denoising autoencoder w/ tied weights.

    Parameters
    ----------
    dimensions : list, optional
        The number of neurons for each layer of the autoencoder.

    Returns
    -------
    x : Tensor
        Input placeholder to the network
    z : Tensor
        Inner-most latent representation
    y : Tensor
        Output reconstruction of the input
    cost : Tensor
        Overall cost to use for training
    """
    # input to the network
    x = tf.placeholder(tf.float32, [None, dimensions[0]], name="x")
    current_input = corrupt(x)
    # Build the encoder
    encoder = []
    for layer_i, n_output in enumerate(dimensions[1:]):
        n_input = int(current_input.get_shape()[1])
        W = tf.Variable(tf.random_uniform([n_input, n_output], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input)))
        b = tf.Variable(tf.zeros([n_output]))
        encoder.append(W)
        output = tf.nn.tanh(tf.matmul(current_input, W) + b)
        current_input = output
    # latent representation
    z = current_input
    encoder.reverse()
    # Build the decoder using the same weights
    for layer_i, n_output in enumerate(dimensions[:-1][::-1]):
        W = tf.transpose(encoder[layer_i])
        b = tf.Variable(tf.zeros([n_output]))
        output = tf.nn.tanh(tf.matmul(current_input, W) + b)
        current_input = output
    # now have the reconstruction through the network
    y = current_input
    # cost function measures pixel-wise difference
    cost = tf.sqrt(tf.reduce_mean(tf.square(y - x)))
    return {"x": x, "z": z, "y": y, "cost": cost}
Esempio n. 5
0
def VAE(input_shape=[None, 784],
        n_filters=[64, 64, 64],
        filter_sizes=[4, 4, 4],
        n_hidden=32,
        n_code=2,
        activation=tf.nn.tanh,
        dropout=False,
        denoising=False,
        convolutional=False,
        variational=False,
        on_cloud=0):
    """(Variational) (Convolutional) (Denoising) Autoencoder.

    Uses tied weights.

    Parameters
    ----------
    input_shape : list, optional
        Shape of the input to the network. e.g. for MNIST: [None, 784].
    n_filters : list, optional
        Number of filters for each layer.
        If convolutional=True, this refers to the total number of output
        filters to create for each layer, with each layer's number of output
        filters as a list.
        If convolutional=False, then this refers to the total number of neurons
        for each layer in a fully connected network.
    filter_sizes : list, optional
        Only applied when convolutional=True.  This refers to the ksize (height
        and width) of each convolutional layer.
    n_hidden : int, optional
        Only applied when variational=True.  This refers to the first fully
        connected layer prior to the variational embedding, directly after
        the encoding.  After the variational embedding, another fully connected
        layer is created with the same size prior to decoding.  Set to 0 to
        not use an additional hidden layer.
    n_code : int, optional
        Only applied when variational=True.  This refers to the number of
        latent Gaussians to sample for creating the inner most encoding.
    activation : function, optional
        Activation function to apply to each layer, e.g. tf.nn.relu
    dropout : bool, optional
        Whether or not to apply dropout.  If using dropout, you must feed a
        value for 'keep_prob', as returned in the dictionary.  1.0 means no
        dropout is used.  0.0 means every connection is dropped.  Sensible
        values are between 0.5-0.8.
    denoising : bool, optional
        Whether or not to apply denoising.  If using denoising, you must feed a
        value for 'corrupt_prob', as returned in the dictionary.  1.0 means no
        corruption is used.  0.0 means every feature is corrupted.  Sensible
        values are between 0.5-0.8.
    convolutional : bool, optional
        Whether or not to use a convolutional network or else a fully connected
        network will be created.  This effects the n_filters parameter's
        meaning.
    variational : bool, optional
        Whether or not to create a variational embedding layer.  This will
        create a fully connected layer after the encoding, if `n_hidden` is
        greater than 0, then will create a multivariate gaussian sampling
        layer, then another fully connected layer.  The size of the fully
        connected layers are determined by `n_hidden`, and the size of the
        sampling layer is determined by `n_code`.

    Returns
    -------
    model : dict
        {
            'cost': Tensor to optimize.
            'Ws': All weights of the encoder.
            'x': Input Placeholder
            'z': Inner most encoding Tensor (latent features)
            'y': Reconstruction of the Decoder
            'keep_prob': Amount to keep when using Dropout
            'corrupt_prob': Amount to corrupt when using Denoising
            'train': Set to True when training/Applies to Batch Normalization.
        }
    """
    # network input / placeholders for train (bn) and dropout
    x = tf.placeholder(tf.float32, input_shape, 'x')
    phase_train = tf.placeholder(tf.bool, name='phase_train')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    corrupt_prob = tf.placeholder(tf.float32, [1])

    # apply noise if denoising
    x_ = (utils.corrupt(x) * corrupt_prob + x *
          (1 - corrupt_prob)) if denoising else x

    # 2d -> 4d if convolution
    x_tensor = utils.to_tensor(x_) if convolutional else x_
    current_input = x_tensor

    Ws = []
    shapes = []

    # Build the encoder
    for layer_i, n_output in enumerate(n_filters):
        with tf.variable_scope('encoder/{}'.format(layer_i)):
            shapes.append(current_input.get_shape().as_list())
            if convolutional:
                h, W = utils.conv2d(x=current_input,
                                    n_output=n_output,
                                    k_h=filter_sizes[layer_i],
                                    k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            Ws.append(W)
            current_input = h

    shapes.append(current_input.get_shape().as_list())

    with tf.variable_scope('variational'):
        if variational:
            dims = current_input.get_shape().as_list()
            flattened = utils.flatten(current_input)

            if n_hidden:
                h = utils.linear(flattened, n_hidden, name='W_fc')[0]
                h = activation(batch_norm(h, phase_train, 'fc/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = flattened

            z_mu = utils.linear(h, n_code, name='mu')[0]
            z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0]

            # Sample from noise distribution p(eps) ~ N(0, 1)
            epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code]))

            # Sample from posterior
            z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma))

            if n_hidden:
                h = utils.linear(z, n_hidden, name='fc_t')[0]
                h = activation(batch_norm(h, phase_train, 'fc_t/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = z

            size = dims[1] * dims[2] * dims[3] if convolutional else dims[1]
            h = utils.linear(h, size, name='fc_t2')[0]
            current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn'))
            if dropout:
                current_input = tf.nn.dropout(current_input, keep_prob)

            if convolutional:
                current_input = tf.reshape(
                    current_input,
                    tf.stack([
                        tf.shape(current_input)[0], dims[1], dims[2], dims[3]
                    ]))
        else:
            z = current_input

    shapes.reverse()
    n_filters.reverse()
    Ws.reverse()

    n_filters += [input_shape[-1]]

    # %%
    # Decoding layers
    for layer_i, n_output in enumerate(n_filters[1:]):
        with tf.variable_scope('decoder/{}'.format(layer_i)):
            shape = shapes[layer_i + 1]
            if convolutional:
                h, W = utils.deconv2d(x=current_input,
                                      n_output_h=shape[1],
                                      n_output_w=shape[2],
                                      n_output_ch=shape[3],
                                      n_input_ch=shapes[layer_i][3],
                                      k_h=filter_sizes[layer_i],
                                      k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            current_input = h

    y = current_input
    x_flat = utils.flatten(x)
    y_flat = utils.flatten(y)

    # l2 loss
    loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1)

    if variational:
        # variational lower bound, kl-divergence
        loss_z = -0.5 * tf.reduce_sum(
            1.0 + 2.0 * z_log_sigma - tf.square(z_mu) -
            tf.exp(2.0 * z_log_sigma), 1)

        # add l2 loss
        cost = tf.reduce_mean(loss_x + loss_z)
    else:
        # just optimize l2 loss
        cost = tf.reduce_mean(loss_x)

    return {
        'cost': cost,
        'Ws': Ws,
        'x': x,
        'z': z,
        'y': y,
        'keep_prob': keep_prob,
        'corrupt_prob': corrupt_prob,
        'train': phase_train
    }
def autoencoder(dimensions=[784, 512, 256, 64]):
    """Build a deep denoising autoencoder w/ tied weights.

    Parameters
    ----------
    dimensions : list, optional
        The number of neurons for each layer of the autoencoder.

    Returns
    -------
    x : Tensor
        Input placeholder to the network
    z : Tensor
        Inner-most latent representation
    y : Tensor
        Output reconstruction of the input
    cost : Tensor
        Overall cost to use for training
    """
    # input to the network
    x = tf.placeholder(tf.float32, [None, dimensions[0]], name='x')

    # Probability that we will corrupt input.
    # This is the essence of the denoising autoencoder, and is pretty
    # basic.  We'll feed forward a noisy input, allowing our network
    # to generalize better, possibly, to occlusions of what we're
    # really interested in.  But to measure accuracy, we'll still
    # enforce a training signal which measures the original image's
    # reconstruction cost.
    #
    # We'll change this to 1 during training
    # but when we're ready for testing/production ready environments,
    # we'll put it back to 0.
    corrupt_prob = tf.placeholder(tf.float32, [1])
    current_input = corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)

    # Build the encoder
    encoder = []
    for layer_i, n_output in enumerate(dimensions[1:]):
        n_input = int(current_input.get_shape()[1])
        W = tf.Variable(
            tf.random_uniform([n_input, n_output], -1.0 / math.sqrt(n_input),
                              1.0 / math.sqrt(n_input)))
        b = tf.Variable(tf.zeros([n_output]))
        encoder.append(W)
        output = tf.nn.tanh(tf.matmul(current_input, W) + b)
        current_input = output
    # latent representation
    z = current_input
    encoder.reverse()
    # Build the decoder using the same weights
    for layer_i, n_output in enumerate(dimensions[:-1][::-1]):
        W = tf.transpose(encoder[layer_i])
        b = tf.Variable(tf.zeros([n_output]))
        output = tf.nn.tanh(tf.matmul(current_input, W) + b)
        current_input = output
    # now have the reconstruction through the network
    y = current_input
    # cost function measures pixel-wise difference
    cost = tf.sqrt(tf.reduce_mean(tf.square(y - x)))
    return {'x': x, 'z': z, 'y': y, 'corrupt_prob': corrupt_prob, 'cost': cost}
def autoencoder(dimensions=[784, 512, 256, 64]):
    """Build a deep denoising autoencoder w/ tied weights.

    Parameters
    ----------
    dimensions : list, optional
        The number of neurons for each layer of the autoencoder.

    Returns
    -------
    x : Tensor
        Input placeholder to the network
    z : Tensor
        Inner-most latent representation
    y : Tensor
        Output reconstruction of the input
    cost : Tensor
        Overall cost to use for training
    """
    # input to the network
    x = tf.placeholder(tf.float32, [None, dimensions[0]], name='x')

    # Probability that we will corrupt input.
    # This is the essence of the denoising autoencoder, and is pretty
    # basic.  We'll feed forward a noisy input, allowing our network
    # to generalize better, possibly, to occlusions of what we're
    # really interested in.  But to measure accuracy, we'll still
    # enforce a training signal which measures the original image's
    # reconstruction cost.
    #
    # We'll change this to 1 during training
    # but when we're ready for testing/production ready environments,
    # we'll put it back to 0.
    corrupt_prob = tf.placeholder(tf.float32, [1])
    current_input = corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)

    # Build the encoder
    encoder = []
    for layer_i, n_output in enumerate(dimensions[1:]):
        n_input = int(current_input.get_shape()[1])
        W = tf.Variable(
            tf.random_uniform([n_input, n_output],
                              -1.0 / math.sqrt(n_input),
                              1.0 / math.sqrt(n_input)))
        b = tf.Variable(tf.zeros([n_output]))
        encoder.append(W)
        output = tf.nn.tanh(tf.matmul(current_input, W) + b)
        current_input = output
    # latent representation
    z = current_input
    encoder.reverse()
    # Build the decoder using the same weights
    for layer_i, n_output in enumerate(dimensions[:-1][::-1]):
        W = tf.transpose(encoder[layer_i])
        b = tf.Variable(tf.zeros([n_output]))
        output = tf.nn.tanh(tf.matmul(current_input, W) + b)
        current_input = output
    # now have the reconstruction through the network
    y = current_input
    # cost function measures pixel-wise difference
    cost = tf.sqrt(tf.reduce_mean(tf.square(y - x)))
    return {'x': x, 'z': z, 'y': y,
            'corrupt_prob': corrupt_prob,
            'cost': cost}
def VAE(input_shape=[None, 784],
        n_filters=[],
        filter_sizes=[],
        n_hidden=512,
        n_code=64,
        activation=tf.nn.relu,
        denoising=False,
        convolutional=False,
        debug=False):
    # %%
    # Input placeholder
    if debug:
        input_shape = [50, 784]
        x = tf.Variable(np.zeros((input_shape), dtype=np.float32))
    else:
        x = tf.placeholder(tf.float32, input_shape)

    # %%
    # Optionally apply denoising autoencoder
    if denoising:
        x_noise = corrupt(x)
    else:
        x_noise = x

    # %%
    # ensure 2-d is converted to square tensor.
    if convolutional:
        if len(x.get_shape()) == 2:
            x_dim = np.sqrt(x_noise.get_shape().as_list()[1])
            if x_dim != int(x_dim):
                raise ValueError('Unsupported input dimensions')
            x_dim = int(x_dim)
            x_tensor = tf.reshape(
                x_noise, [-1, x_dim, x_dim, 1])
        elif len(x_noise.get_shape()) == 4:
            x_tensor = x_noise
        else:
            raise ValueError('Unsupported input dimensions')
    else:
        x_tensor = x
    current_input = x_tensor

    print('* Input')
    print('X:', current_input.get_shape().as_list())
    # %%
    # Build the encoder
    shapes = []
    print('* Encoder')
    for layer_i, n_input in enumerate(n_filters[:-1]):
        n_output = n_filters[layer_i + 1]
        shapes.append(current_input.get_shape().as_list())
        if convolutional:
            n_input = shapes[-1][3]
            W = weight_variable([
                filter_sizes[layer_i],
                filter_sizes[layer_i],
                n_input, n_output])
            b = bias_variable([n_output])
            output = activation(
                tf.add(tf.nn.conv2d(
                    current_input, W,
                    strides=[1, 2, 2, 1], padding='SAME'), b))
        else:
            W = weight_variable([n_input, n_output])
            b = bias_variable([n_output])
            output = activation(tf.matmul(current_input, W) + b)
        print('in:', current_input.get_shape().as_list(),
              'W:', W.get_shape().as_list(),
              'b:', b.get_shape().as_list(),
              'out:', output.get_shape().as_list())
        current_input = output

    dims = current_input.get_shape().as_list()
    if convolutional:
        # %%
        # Flatten and build latent layer as means and standard deviations
        size = (dims[1] * dims[2] * dims[3])
        if debug:
            flattened = tf.reshape(current_input, [dims[0], size])
        else:
            flattened = tf.reshape(current_input,
                                   tf.pack([tf.shape(x)[0], size]))
    else:
        size = dims[1]
        flattened = current_input

    print('* Reshape')
    print(current_input.get_shape().as_list(),
          '->', flattened.get_shape().as_list())

    print('* FC Layer')
    W_fc = weight_variable([size, n_hidden])
    b_fc = bias_variable([n_hidden])
    h = tf.nn.tanh(tf.matmul(flattened, W_fc) + b_fc)
    print('in:', current_input.get_shape().as_list(),
          'W_fc:', W_fc.get_shape().as_list(),
          'b_fc:', b_fc.get_shape().as_list(),
          'h:', h.get_shape().as_list())

    print('* Variational Autoencoder')
    W_mu = weight_variable([n_hidden, n_code])
    b_mu = bias_variable([n_code])

    W_sigma = weight_variable([n_hidden, n_code])
    b_sigma = bias_variable([n_code])

    mu = tf.matmul(h, W_mu) + b_mu
    log_sigma = tf.mul(0.5, tf.matmul(h, W_sigma) + b_sigma)
    print('in:', h.get_shape().as_list(),
          'W_mu:', W_mu.get_shape().as_list(),
          'b_mu:', b_mu.get_shape().as_list(),
          'mu:', mu.get_shape().as_list())
    print('in:', h.get_shape().as_list(),
          'W_sigma:', W_sigma.get_shape().as_list(),
          'b_sigma:', b_sigma.get_shape().as_list(),
          'log_sigma:', log_sigma.get_shape().as_list())
    # %%
    # Sample from noise distribution p(eps) ~ N(0, 1)
    if debug:
        epsilon = tf.random_normal(
            [dims[0], n_code])
    else:
        epsilon = tf.random_normal(
            tf.pack([tf.shape(x)[0], n_code]))
    print('epsilon:', epsilon.get_shape().as_list())

    # Sample from posterior
    z = mu + tf.mul(epsilon, tf.exp(log_sigma))
    print('z:', z.get_shape().as_list())

    print('* Decoder')
    W_dec = weight_variable([n_code, n_hidden])
    b_dec = bias_variable([n_hidden])
    h_dec = tf.nn.relu(tf.matmul(z, W_dec) + b_dec)
    print('in:', z.get_shape().as_list(),
          'W_dec:', W_dec.get_shape().as_list(),
          'b_dec:', b_dec.get_shape().as_list(),
          'h_dec:', h_dec.get_shape().as_list())

    W_fc_t = weight_variable([n_hidden, size])
    b_fc_t = bias_variable([size])
    h_fc_dec = tf.nn.relu(tf.matmul(h_dec, W_fc_t) + b_fc_t)
    print('in:', h_dec.get_shape().as_list(),
          'W_fc_t:', W_fc_t.get_shape().as_list(),
          'b_fc_t:', b_fc_t.get_shape().as_list(),
          'h_fc_dec:', h_fc_dec.get_shape().as_list())

    if convolutional:
        if debug:
            h_tensor = tf.reshape(
                h_fc_dec, [dims[0], dims[1], dims[2], dims[3]])
        else:
            h_tensor = tf.reshape(
                h_fc_dec, tf.pack([tf.shape(x)[0], dims[1], dims[2], dims[3]]))
    else:
        h_tensor = h_fc_dec

    shapes.reverse()
    n_filters.reverse()

    print('* Reshape')
    print(h_fc_dec.get_shape().as_list(),
          '->', h_tensor.get_shape().as_list())

    ## %%
    ## Decoding layers
    current_input = h_tensor
    for layer_i, n_output in enumerate(n_filters[:-1][::-1]):
        n_input = n_filters[layer_i]
        n_output = n_filters[layer_i + 1]
        shape = shapes[layer_i]
        if convolutional:
            W = weight_variable([
                    filter_sizes[layer_i],
                    filter_sizes[layer_i],
                    n_output, n_input])
            b = bias_variable([n_output])
            if debug:
                output = activation(tf.add(
                    tf.nn.deconv2d(
                        current_input, W,
                        shape,
                        strides=[1, 2, 2, 1], padding='SAME'), b))
            else:
                output = activation(tf.add(
                    tf.nn.deconv2d(
                        current_input, W,
                        tf.pack(
                            [tf.shape(x)[0], shape[1], shape[2], shape[3]]),
                        strides=[1, 2, 2, 1], padding='SAME'), b))
        else:
            W = weight_variable([n_input, n_output])
            b = bias_variable([n_output])
            output = activation(tf.matmul(current_input, W) + b)
        print('in:', current_input.get_shape().as_list(),
              'W:', W.get_shape().as_list(),
              'b:', b.get_shape().as_list(),
              'out:', output.get_shape().as_list())
        current_input = output

    # %%
    # Now have the reconstruction through the network
    y_tensor = current_input
    y = tf.reshape(y_tensor, tf.pack([tf.shape(x)[0], input_shape[1]]))

    print('* Output')
    print('Y:', y_tensor.get_shape().as_list())

    # %%
    # Log Prior: D_KL(q(z|x)||p(z))
    # Equation 10
    prior_loss = 0.5 * tf.reduce_sum(
        1.0 + 2.0 * log_sigma - tf.pow(mu, 2.0) - tf.exp(2.0 * log_sigma))

    # Reconstruction Cost
    recon_loss = tf.reduce_sum(tf.abs(y_tensor - x_tensor))

    # Total cost
    loss = recon_loss - prior_loss

    # log_px_given_z = normal2(x, mu, log_sigma)
    # loss = (log_pz + log_px_given_z - log_qz_given_x).sum()

    return {'cost': loss, 'x': x, 'z': z, 'y': y}
Esempio n. 9
0
def train_mode(gen,
               dis,
               trainLoader,
               useNoise=False,
               beta1=0.5,
               c=0.01,
               k=1,
               WGAN=False):
    ####### Define optimizer #######
    genOptimizer = optim.Adam(gen.parameters(),
                              lr=opts.lr,
                              betas=(beta1, 0.999))
    disOptimizer = optim.Adam(dis.parameters(),
                              lr=opts.lr,
                              betas=(beta1, 0.999))

    if gen.useCUDA:
        torch.cuda.set_device(opts.gpuNo)
        gen.cuda()
        dis.cuda()

    ####### Create a new folder to save results and model info #######
    exDir = make_new_folder(opts.outDir)
    print 'Outputs will be saved to:', exDir
    save_input_args(exDir, opts)

    #noise level
    noiseSigma = np.logspace(np.log2(0.5),
                             np.log2(0.001),
                             opts.maxEpochs,
                             base=2)

    ####### Start Training #######
    losses = {'gen': [], 'dis': []}
    for e in range(opts.maxEpochs):
        dis.train()
        gen.train()

        epochLoss_gen = 0
        epochLoss_dis = 0

        noiseLevel = float(noiseSigma[e])

        T = time()
        for i, data in enumerate(trainLoader, 0):

            for _ in range(k):
                # add a small amount of corruption to the data
                xReal = Variable(data[0])
                if gen.useCUDA:
                    xReal = xReal.cuda()

                if useNoise:
                    xReal = corrupt(xReal, noiseLevel)  #add a little noise

                ####### Calculate discriminator loss #######
                noSamples = xReal.size(0)

                xFake = gen.sample_x(noSamples)
                if useNoise:
                    xFake = corrupt(xFake, noiseLevel)  #add a little noise
                pReal_D = dis.forward(xReal)
                pFake_D = dis.forward(xFake.detach())

                real = dis.ones(xReal.size(0))
                fake = dis.zeros(xFake.size(0))

                if WGAN:
                    disLoss = pFake_D.mean() - pReal_D.mean()
                else:
                    disLoss = opts.pi * F.binary_cross_entropy(pReal_D, real) + \
                      (1 - opts.pi) * F.binary_cross_entropy(pFake_D, fake)

                ####### Do DIS updates #######
                disOptimizer.zero_grad()
                disLoss.backward()
                disOptimizer.step()

                #### clip DIS weights #### YM
                if WGAN:
                    for p in dis.parameters():
                        p.data.clamp_(-c, c)

                losses['dis'].append(disLoss.data[0])

            ####### Calculate generator loss #######
            xFake_ = gen.sample_x(noSamples)
            if useNoise:
                xFake_ = corrupt(xFake_, noiseLevel)  #add a little noise
            pFake_G = dis.forward(xFake_)

            if WGAN:
                genLoss = -pFake_G.mean()
            else:
                genLoss = F.binary_cross_entropy(pFake_G, real)

            ####### Do GEN updates #######
            genOptimizer.zero_grad()
            genLoss.backward()
            genOptimizer.step()

            losses['gen'].append(genLoss.data[0])

            ####### Print info #######
            if i % 100 == 1:
                print '[%d, %d] gen: %.5f, dis: %.5f, time: %.2f' \
                 % (e, i, genLoss.data[0], disLoss.data[0], time()-T)

        ####### Tests #######
        gen.eval()
        print 'Outputs will be saved to:', exDir
        #save some samples
        samples = gen.sample_x(49)
        save_image(samples.data,
                   join(exDir, 'epoch' + str(e) + '.png'),
                   normalize=True)

        #plot
        plot_losses(losses, exDir, epochs=e + 1)

        ####### Save params #######
        gen.save_params(exDir)
        dis.save_params(exDir)

    return gen, dis
Esempio n. 10
0
def VAE(input_shape=[None, 784],
        n_components_encoder=200,
        n_components_decoder=200,
        n_hidden=20,
        continuous=False,
        denoising=False,
        debug=False):
    # %%
    # Input placeholder
    if debug:
        input_shape = [50, 784]
        x = tf.Variable(np.zeros((input_shape), dtype=np.float32))
    else:
        x = tf.placeholder(tf.float32, input_shape)

    print('* Input')
    print('X:', x.get_shape().as_list())

    # %%
    # Optionally apply noise
    if denoising:
        print('* Denoising')
        x_noise = corrupt(x)
    else:
        x_noise = x

    if continuous:
        activation = lambda x: tf.log(1 + tf.exp(x))
    else:
        activation = lambda x: tf.tanh(x)

    dims = x_noise.get_shape().as_list()
    n_features = dims[1]

    print('* Encoder')
    W_enc = weight_variable([n_features, n_components_encoder])
    b_enc = bias_variable([n_components_encoder])
    h_enc = activation(tf.matmul(x_noise, W_enc) + b_enc)
    print('in:',
          x_noise.get_shape().as_list(), 'W_enc:',
          W_enc.get_shape().as_list(), 'b_enc:',
          b_enc.get_shape().as_list(), 'h_enc:',
          h_enc.get_shape().as_list())

    print('* Variational Autoencoder')
    W_mu = weight_variable([n_components_encoder, n_hidden])
    b_mu = bias_variable([n_hidden])

    W_log_sigma = weight_variable([n_components_encoder, n_hidden])
    b_log_sigma = bias_variable([n_hidden])

    z_mu = tf.matmul(h_enc, W_mu) + b_mu
    z_log_sigma = 0.5 * (tf.matmul(h_enc, W_log_sigma) + b_log_sigma)
    print('in:',
          h_enc.get_shape().as_list(), 'W_mu:',
          W_mu.get_shape().as_list(), 'b_mu:',
          b_mu.get_shape().as_list(), 'z_mu:',
          z_mu.get_shape().as_list())
    print('in:',
          h_enc.get_shape().as_list(), 'W_log_sigma:',
          W_log_sigma.get_shape().as_list(), 'b_log_sigma:',
          b_log_sigma.get_shape().as_list(), 'z_log_sigma:',
          z_log_sigma.get_shape().as_list())
    # %%
    # Sample from noise distribution p(eps) ~ N(0, 1)
    if debug:
        epsilon = tf.random_normal([dims[0], n_hidden])
    else:
        epsilon = tf.random_normal(tf.pack([tf.shape(x)[0], n_hidden]))
    print('epsilon:', epsilon.get_shape().as_list())

    # Sample from posterior
    z = z_mu + tf.exp(z_log_sigma) * epsilon
    print('z:', z.get_shape().as_list())

    print('* Decoder')
    W_dec = weight_variable([n_hidden, n_components_decoder])
    b_dec = bias_variable([n_components_decoder])
    h_dec = activation(tf.matmul(z, W_dec) + b_dec)
    print('in:',
          z.get_shape().as_list(), 'W_dec:',
          W_dec.get_shape().as_list(), 'b_dec:',
          b_dec.get_shape().as_list(), 'h_dec:',
          h_dec.get_shape().as_list())

    W_mu_dec = weight_variable([n_components_decoder, n_features])
    b_mu_dec = bias_variable([n_features])
    y = tf.nn.sigmoid(tf.matmul(h_dec, W_mu_dec) + b_mu_dec)
    print('in:',
          z.get_shape().as_list(), 'W_mu_dec:',
          W_mu_dec.get_shape().as_list(), 'b_mu_dec:',
          b_mu_dec.get_shape().as_list(), 'y:',
          y.get_shape().as_list())

    W_log_sigma_dec = weight_variable([n_components_decoder, n_features])
    b_log_sigma_dec = bias_variable([n_features])
    y_log_sigma = 0.5 * (tf.matmul(h_dec, W_log_sigma_dec) + b_log_sigma_dec)
    print('in:',
          z.get_shape().as_list(), 'W_log_sigma_dec:',
          W_log_sigma_dec.get_shape().as_list(), 'b_log_sigma_dec:',
          b_log_sigma_dec.get_shape().as_list(), 'y_log_sigma:',
          y_log_sigma.get_shape().as_list())

    # p(x|z)
    if continuous:
        log_px_given_z = tf.reduce_sum(
            -(0.5 * tf.log(2.0 * np.pi) + y_log_sigma) -
            0.5 * tf.square((x - y) / tf.exp(y_log_sigma)))
    else:
        log_px_given_z = tf.reduce_sum(x * tf.log(y) + (1 - x) * tf.log(1 - y))

    # d_kl(q(z|x)||p(z))
    # Appendix B: 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_div = 0.5 * tf.reduce_sum(1.0 + 2.0 * z_log_sigma - tf.square(z_mu) -
                                 tf.exp(2.0 * z_log_sigma))

    print('* Output')
    print('Y:', y.get_shape().as_list())

    loss = -(log_px_given_z + kl_div)

    return {'cost': loss, 'x': x, 'z': z, 'y': y}
Esempio n. 11
0
def VAE(input_shape=[None, 784],
        n_filters=[],
        filter_sizes=[],
        n_hidden=512,
        n_code=64,
        activation=tf.nn.relu,
        denoising=False,
        convolutional=False,
        debug=False):
    # %%
    # Input placeholder
    if debug:
        input_shape = [50, 784]
        x = tf.Variable(np.zeros((input_shape), dtype=np.float32))
    else:
        x = tf.placeholder(tf.float32, input_shape)

    # %%
    # Optionally apply denoising autoencoder
    if denoising:
        x_noise = corrupt(x)
    else:
        x_noise = x

    # %%
    # ensure 2-d is converted to square tensor.
    if convolutional:
        if len(x.get_shape()) == 2:
            x_dim = np.sqrt(x_noise.get_shape().as_list()[1])
            if x_dim != int(x_dim):
                raise ValueError('Unsupported input dimensions')
            x_dim = int(x_dim)
            x_tensor = tf.reshape(x_noise, [-1, x_dim, x_dim, 1])
        elif len(x_noise.get_shape()) == 4:
            x_tensor = x_noise
        else:
            raise ValueError('Unsupported input dimensions')
    else:
        x_tensor = x
    current_input = x_tensor

    print('* Input')
    print('X:', current_input.get_shape().as_list())
    # %%
    # Build the encoder
    shapes = []
    print('* Encoder')
    for layer_i, n_input in enumerate(n_filters[:-1]):
        n_output = n_filters[layer_i + 1]
        shapes.append(current_input.get_shape().as_list())
        if convolutional:
            n_input = shapes[-1][3]
            W = weight_variable([
                filter_sizes[layer_i], filter_sizes[layer_i], n_input, n_output
            ])
            b = bias_variable([n_output])
            output = activation(
                tf.add(
                    tf.nn.conv2d(current_input,
                                 W,
                                 strides=[1, 2, 2, 1],
                                 padding='SAME'), b))
        else:
            W = weight_variable([n_input, n_output])
            b = bias_variable([n_output])
            output = activation(tf.matmul(current_input, W) + b)
        print('in:',
              current_input.get_shape().as_list(), 'W:',
              W.get_shape().as_list(), 'b:',
              b.get_shape().as_list(), 'out:',
              output.get_shape().as_list())
        current_input = output

    dims = current_input.get_shape().as_list()
    if convolutional:
        # %%
        # Flatten and build latent layer as means and standard deviations
        size = (dims[1] * dims[2] * dims[3])
        if debug:
            flattened = tf.reshape(current_input, [dims[0], size])
        else:
            flattened = tf.reshape(current_input,
                                   tf.pack([tf.shape(x)[0], size]))
    else:
        size = dims[1]
        flattened = current_input

    print('* Reshape')
    print(current_input.get_shape().as_list(), '->',
          flattened.get_shape().as_list())

    print('* FC Layer')
    W_fc = weight_variable([size, n_hidden])
    b_fc = bias_variable([n_hidden])
    h = tf.nn.tanh(tf.matmul(flattened, W_fc) + b_fc)
    print('in:',
          current_input.get_shape().as_list(), 'W_fc:',
          W_fc.get_shape().as_list(), 'b_fc:',
          b_fc.get_shape().as_list(), 'h:',
          h.get_shape().as_list())

    print('* Variational Autoencoder')
    W_mu = weight_variable([n_hidden, n_code])
    b_mu = bias_variable([n_code])

    W_sigma = weight_variable([n_hidden, n_code])
    b_sigma = bias_variable([n_code])

    mu = tf.matmul(h, W_mu) + b_mu
    log_sigma = tf.mul(0.5, tf.matmul(h, W_sigma) + b_sigma)
    print('in:',
          h.get_shape().as_list(), 'W_mu:',
          W_mu.get_shape().as_list(), 'b_mu:',
          b_mu.get_shape().as_list(), 'mu:',
          mu.get_shape().as_list())
    print('in:',
          h.get_shape().as_list(), 'W_sigma:',
          W_sigma.get_shape().as_list(), 'b_sigma:',
          b_sigma.get_shape().as_list(), 'log_sigma:',
          log_sigma.get_shape().as_list())
    # %%
    # Sample from noise distribution p(eps) ~ N(0, 1)
    if debug:
        epsilon = tf.random_normal([dims[0], n_code])
    else:
        epsilon = tf.random_normal(tf.pack([tf.shape(x)[0], n_code]))
    print('epsilon:', epsilon.get_shape().as_list())

    # Sample from posterior
    z = mu + tf.mul(epsilon, tf.exp(log_sigma))
    print('z:', z.get_shape().as_list())

    print('* Decoder')
    W_dec = weight_variable([n_code, n_hidden])
    b_dec = bias_variable([n_hidden])
    h_dec = tf.nn.relu(tf.matmul(z, W_dec) + b_dec)
    print('in:',
          z.get_shape().as_list(), 'W_dec:',
          W_dec.get_shape().as_list(), 'b_dec:',
          b_dec.get_shape().as_list(), 'h_dec:',
          h_dec.get_shape().as_list())

    W_fc_t = weight_variable([n_hidden, size])
    b_fc_t = bias_variable([size])
    h_fc_dec = tf.nn.relu(tf.matmul(h_dec, W_fc_t) + b_fc_t)
    print('in:',
          h_dec.get_shape().as_list(), 'W_fc_t:',
          W_fc_t.get_shape().as_list(), 'b_fc_t:',
          b_fc_t.get_shape().as_list(), 'h_fc_dec:',
          h_fc_dec.get_shape().as_list())

    if convolutional:
        if debug:
            h_tensor = tf.reshape(h_fc_dec,
                                  [dims[0], dims[1], dims[2], dims[3]])
        else:
            h_tensor = tf.reshape(
                h_fc_dec, tf.pack([tf.shape(x)[0], dims[1], dims[2], dims[3]]))
    else:
        h_tensor = h_fc_dec

    shapes.reverse()
    n_filters.reverse()

    print('* Reshape')
    print(h_fc_dec.get_shape().as_list(), '->', h_tensor.get_shape().as_list())

    ## %%
    ## Decoding layers
    current_input = h_tensor
    for layer_i, n_output in enumerate(n_filters[:-1][::-1]):
        n_input = n_filters[layer_i]
        n_output = n_filters[layer_i + 1]
        shape = shapes[layer_i]
        if convolutional:
            W = weight_variable([
                filter_sizes[layer_i], filter_sizes[layer_i], n_output, n_input
            ])
            b = bias_variable([n_output])
            if debug:
                output = activation(
                    tf.add(
                        tf.nn.deconv2d(current_input,
                                       W,
                                       shape,
                                       strides=[1, 2, 2, 1],
                                       padding='SAME'), b))
            else:
                output = activation(
                    tf.add(
                        tf.nn.deconv2d(current_input,
                                       W,
                                       tf.pack([
                                           tf.shape(x)[0], shape[1], shape[2],
                                           shape[3]
                                       ]),
                                       strides=[1, 2, 2, 1],
                                       padding='SAME'), b))
        else:
            W = weight_variable([n_input, n_output])
            b = bias_variable([n_output])
            output = activation(tf.matmul(current_input, W) + b)
        print('in:',
              current_input.get_shape().as_list(), 'W:',
              W.get_shape().as_list(), 'b:',
              b.get_shape().as_list(), 'out:',
              output.get_shape().as_list())
        current_input = output

    # %%
    # Now have the reconstruction through the network
    y_tensor = current_input
    y = tf.reshape(y_tensor, tf.pack([tf.shape(x)[0], input_shape[1]]))

    print('* Output')
    print('Y:', y_tensor.get_shape().as_list())

    # %%
    # Log Prior: D_KL(q(z|x)||p(z))
    # Equation 10
    prior_loss = 0.5 * tf.reduce_sum(1.0 + 2.0 * log_sigma - tf.pow(mu, 2.0) -
                                     tf.exp(2.0 * log_sigma))

    # Reconstruction Cost
    recon_loss = tf.reduce_sum(tf.abs(y_tensor - x_tensor))

    # Total cost
    loss = recon_loss - prior_loss

    # log_px_given_z = normal2(x, mu, log_sigma)
    # loss = (log_pz + log_px_given_z - log_qz_given_x).sum()

    return {'cost': loss, 'x': x, 'z': z, 'y': y}
Esempio n. 12
0
    def train(self,
              x,
              epochs=15,
              lr=0.01,
              batch_size=20,
              corruption_level=0.3,
              regularization=0):
        n_batch = x.shape[0] / batch_size
        corrupt_x = corrupt(x,
                            corruption_level)  # add noise to the original data
        learning_curve_list = []

        for i in xrange(epochs):
            Loss = []
            if i == 0:
                start_time = time.clock()
            for j in xrange(n_batch):
                batch_x = x[j * batch_size:(j + 1) *
                            batch_size]  # get minibatch of original data
                corrupt_batch_x = corrupt_x[
                    j * batch_size:(j + 1) *
                    batch_size]  # get minibatch of corrupted data

                hidden_in, cache1 = affine_forward(corrupt_batch_x, self.W1,
                                                   self.b1)
                hidden_out, cache2 = sigmoid_forward(hidden_in)
                reconstruct_in, cache3 = affine_forward(
                    hidden_out, self.W2, self.b2)

                batch_loss, dscore = cross_entropy_loss(
                    reconstruct_in, batch_x)
                reg_loss = regularization * 0.5 * self.W1 * self.W1
                loss = batch_loss + reg_loss
                Loss.append(loss)
                """back propagation"""
                grad_W2, grad_b2, grad_hidden_out = affine_backward(
                    dscore, cache3)
                grad_hidden_in = sigmoid_backward(grad_hidden_out, cache2)
                grad_W1, grad_b1, _ = affine_backward(grad_hidden_in, cache1)
                """update parameters"""
                self.W1 -= lr * (grad_W1 + grad_W2.T +
                                 regularization * self.W1)
                self.b1 -= lr * (grad_b1)
                self.b2 -= lr * (grad_b2)

            mean_loss = np.mean(Loss)
            learning_curve_list.append(mean_loss)
            print "average loss is: %f, at epoch: %d" % (mean_loss, i)
            '''visualize weight'''
            if i % 10 == 0:
                cmap = mpl.cm.gray_r
                norm = mpl.colors.Normalize(vmin=0)

                rand_index = randint(0, x.shape[0])
                plt.subplot(1, 3, 1)
                plt.imshow(x[rand_index].reshape(28, 28), cmap=cmap)
                plt.subplot(1, 3, 2)
                plt.imshow(corrupt_x[rand_index].reshape(28, 28), cmap=cmap)
                hidden_random = sigmoid(corrupt_x[rand_index].dot(self.W1) +
                                        self.b1)
                recons_random = sigmoid(hidden_random.dot(self.W2) + self.b2)
                plt.subplot(1, 3, 3)
                plt.imshow(recons_random.reshape(28, 28), cmap=cmap)
                plt.show()

                for i in xrange(100):
                    plt.subplot(10, 10, i)
                    plt.axis('off')
                    plt.imshow(self.W1.T[i, :].reshape(28, 28), cmap=cmap)
                plt.show()
            if i == 0:
                stop_time = time.clock()
                print "one single epoch runs %i minutes!" % (
                    (stop_time - start_time) / 60.0)

        plt.plot(learning_curve_list)
        plt.show()
def autoencoder(input_shape=[None, 784],
                n_filters=[1, 10, 10, 10],
                filter_sizes=[3, 3, 3, 3],
                corruption=False):
    """Build a deep denoising autoencoder w/ tied weights.

    Parameters
    ----------
    input_shape : list, optional
        Description
    n_filters : list, optional
        Description
    filter_sizes : list, optional
        Description

    Returns
    -------
    x : Tensor
        Input placeholder to the network
    z : Tensor
        Inner-most latent representation
    y : Tensor
        Output reconstruction of the input
    cost : Tensor
        Overall cost to use for training

    Raises
    ------
    ValueError
        Description
    """
    # %%
    # input to the network
    x = tf.placeholder(tf.float32, input_shape, name='x')

    # %%
    # Optionally apply denoising autoencoder
    if corruption:
        x_noise = corrupt(x)
    else:
        x_noise = x

    # %%
    # ensure 2-d is converted to square tensor.
    if len(x.get_shape()) == 2:
        x_dim = np.sqrt(x_noise.get_shape().as_list()[1])
        if x_dim != int(x_dim):
            raise ValueError('Unsupported input dimensions')
        x_dim = int(x_dim)
        x_tensor = tf.reshape(x_noise, [-1, x_dim, x_dim, n_filters[0]])
    elif len(x_noise.get_shape()) == 4:
        x_tensor = x_noise
    else:
        raise ValueError('Unsupported input dimensions')
    current_input = x_tensor

    # %%
    # Build the encoder
    encoder = []
    shapes = []
    for layer_i, n_output in enumerate(n_filters[1:]):
        n_input = current_input.get_shape().as_list()[3]
        shapes.append(current_input.get_shape().as_list())
        W = tf.Variable(
            tf.random_uniform([
                filter_sizes[layer_i], filter_sizes[layer_i], n_input, n_output
            ], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input)))
        b = tf.Variable(tf.zeros([n_output]))
        encoder.append(W)
        output = lrelu(
            tf.add(
                tf.nn.conv2d(current_input,
                             W,
                             strides=[1, 2, 2, 1],
                             padding='SAME'), b))
        current_input = output

    # %%
    # store the latent representation
    z = current_input
    encoder.reverse()
    shapes.reverse()

    # %%
    # Build the decoder using the same weights
    for layer_i, shape in enumerate(shapes):
        W = encoder[layer_i]
        b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]]))
        output = lrelu(
            tf.add(
                tf.nn.deconv2d(
                    current_input,
                    W,
                    tf.pack([tf.shape(x)[0], shape[1], shape[2], shape[3]]),
                    strides=[1, 2, 2, 1],
                    padding='SAME'), b))
        current_input = output

    # %%
    # now have the reconstruction through the network
    y = current_input
    # cost function measures pixel-wise difference
    cost = tf.reduce_sum(tf.square(y - x_tensor))

    # %%
    return {'x': x, 'z': z, 'y': y, 'cost': cost}