def model(x, y, is_training):
    # %% We'll convert our MNIST vector data to a 4-D tensor:
    # N x W x H x C
    x_tensor = tf.reshape(x, [-1, 28, 28, 1])

    # %% We'll use a new method called  batch normalization.
    # This process attempts to "reduce internal covariate shift"
    # which is a fancy way of saying that it will normalize updates for each
    # batch using a smoothed version of the batch mean and variance
    # The original paper proposes using this before any nonlinearities
    h_1 = lrelu(batch_norm(conv2d(x_tensor, 32, name='conv1'),
                           is_training,
                           scope='bn1'),
                name='lrelu1')
    h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2'),
                           is_training,
                           scope='bn2'),
                name='lrelu2')
    h_3 = lrelu(batch_norm(conv2d(h_2, 64, name='conv3'),
                           is_training,
                           scope='bn3'),
                name='lrelu3')
    h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4])
    h_4 = linear(h_3_flat, 10)
    y_pred = tf.nn.softmax(h_4)

    # %% Define loss/eval/training functions
    cross_entropy = -tf.reduce_sum(y * tf.log(y_pred))
    train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))

    return [train_step, accuracy]
예제 #2
0
def encoder(x,
            phase_train,
            dimensions=[],
            filter_sizes=[],
            convolutional=False,
            activation=tf.nn.relu,
            output_activation=tf.nn.sigmoid,
            reuse=False):

    if convolutional:  # transforms 2D tensor
        x_tensor = to_tensor(x)  # into a 4D (BWHC) one
    else:
        x_tensor = tf.reshape(tensor=x, shape=[-1, dimensions[0]])
        dimensions = dimensions[1:]

    current_input = x_tensor

    for layer_i, n_output in enumerate(dimensions):
        with tf.variable_scope(str(layer_i), reuse=reuse):
            if convolutional:
                h, W = conv2d(
                    x=current_input,
                    n_output=n_output,
                    k_h=filter_sizes[layer_i],  # height, width
                    k_w=filter_sizes[layer_i],  # for conv filters
                    padding='SAME',  # the size of the output remains the same
                    reuse=reuse)
            else:
                h, W = linear(  # = fully connected
                    x=current_input,
                    n_output=n_output,
                    reuse=reuse)

            # before activation, normalize the output
            # (can be seen as a similar process as the activation,
            # except it's there to make sure the data is 'smooth'
            # throughout the network (in the same way as one
            # normalizes the input data)
            norm = bn.batch_norm(x=h,
                                 phase_train=phase_train,
                                 name='bn',
                                 reuse=reuse)

            output = activation(norm)

        current_input = output

    flattened = flatten(current_input, name='flatten', reuse=reuse)

    if output_activation is None:
        return flattened
    else:
        return output_activation(flattened)
예제 #3
0
def decoder(
        z,
        phase_train,
        dimensions=[],
        channels=[],
        filter_sizes=[],
        convolutional=False,  # will be used convolutionally here
        activation=tf.nn.relu,  # why two different 
        output_activation=tf.nn.tanh,  # activation functions?
        reuse=None):

    if convolutional:
        with tf.variable_scope('fc', reuse=reuse):

            z1, W = linear(  # check out the generator for an idea of
                x=z,  # what channels and dimensions look like 
                n_output=channels[0] * dimensions[0][0] * dimensions[0][1],
                reuse=reuse)

            rsz = tf.reshape(
                z1, [-1, dimensions[0][0], dimensions[0][1], channels[0]])

            current_input = activation(features=bn.batch_norm(
                name='bn', x=rsz, phase_train=phase_train, reuse=reuse))

        dimensions = dimensions[1:]
        channels = channels[1:]
        filter_sizes = filter_sizes[1:]
    else:
        current_input = z

    for layer_i, n_output in enumerate(dimensions):
        with tf.variable_scope(str(layer_i), reuse=reuse):

            if convolutional:
                h, W = deconv2d(x=current_input,
                                n_output_h=n_output[0],
                                n_output_w=n_output[1],
                                n_output_ch=channels[layer_i],
                                k_h=filter_sizes[layer_i],
                                k_w=filter_sizes[layer_i],
                                padding='SAME',
                                reuse=reuse)
            else:
                h, W = linear(x=current_input, n_output=n_output, reuse=reuse)

            # applying batch norm to all layers
            if layer_i < len(dimensions) - 1:
                norm = bn.batch_norm(x=h,
                                     phase_train=phase_train,
                                     name='bn',
                                     reuse=reuse)
                output = activation(norm)
            else:
                output = h
        current_input = output

    if output_activation is None:
        return current_input
    else:
        return output_activation(current_input)
예제 #4
0
def VAE(input_shape=[None, 784],
        output_shape=[None, 784],
        n_filters=[64, 64, 64],
        filter_sizes=[4, 4, 4],
        n_hidden=32,
        n_code=2,
        activation=tf.nn.tanh,
        dropout=False,
        denoising=False,
        convolutional=False,
        variational=False,
        softmax=False,
        classifier='alexnet_v2'):
    """(Variational) (Convolutional) (Denoising) Autoencoder.

    Uses tied weights.

    Parameters
    ----------
    input_shape : list, optional
        Shape of the input to the network. e.g. for MNIST: [None, 784].
    n_filters : list, optional
        Number of filters for each layer.
        If convolutional=True, this refers to the total number of output
        filters to create for each layer, with each layer's number of output
        filters as a list.
        If convolutional=False, then this refers to the total number of neurons
        for each layer in a fully connected network.
    filter_sizes : list, optional
        Only applied when convolutional=True.  This refers to the ksize (height
        and width) of each convolutional layer.
    n_hidden : int, optional
        Only applied when variational=True.  This refers to the first fully
        connected layer prior to the variational embedding, directly after
        the encoding.  After the variational embedding, another fully connected
        layer is created with the same size prior to decoding.  Set to 0 to
        not use an additional hidden layer.
    n_code : int, optional
        Only applied when variational=True.  This refers to the number of
        latent Gaussians to sample for creating the inner most encoding.
    activation : function, optional
        Activation function to apply to each layer, e.g. tf.nn.relu
    dropout : bool, optional
        Whether or not to apply dropout.  If using dropout, you must feed a
        value for 'keep_prob', as returned in the dictionary.  1.0 means no
        dropout is used.  0.0 means every connection is dropped.  Sensible
        values are between 0.5-0.8.
    denoising : bool, optional
        Whether or not to apply denoising.  If using denoising, you must feed a
        value for 'corrupt_rec', as returned in the dictionary.  1.0 means no
        corruption is used.  0.0 means every feature is corrupted.  Sensible
        values are between 0.5-0.8.
    convolutional : bool, optional
        Whether or not to use a convolutional network or else a fully connected
        network will be created.  This effects the n_filters parameter's
        meaning.
    variational : bool, optional
        Whether or not to create a variational embedding layer.  This will
        create a fully connected layer after the encoding, if `n_hidden` is
        greater than 0, then will create a multivariate gaussian sampling
        layer, then another fully connected layer.  The size of the fully
        connected layers are determined by `n_hidden`, and the size of the
        sampling layer is determined by `n_code`.

    Returns
    -------
    model : dict
        {
            'cost': Tensor to optimize.
            'Ws': All weights of the encoder.
            'x': Input Placeholder
            'z': Inner most encoding Tensor (latent features)
            'y': Reconstruction of the Decoder
            'keep_prob': Amount to keep when using Dropout
            'corrupt_rec': Amount to corrupt when using Denoising
            'train': Set to True when training/Applies to Batch Normalization.
        }
    """
    # network input / placeholders for train (bn) and dropout
    x = tf.placeholder(tf.float32, input_shape, 'x')
    t = tf.placeholder(tf.float32, output_shape, 't')
    label = tf.placeholder(tf.int32, [None], 'label')
    phase_train = tf.placeholder(tf.bool, name='phase_train')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    corrupt_rec = tf.placeholder(tf.float32, name='corrupt_rec')
    corrupt_cls = tf.placeholder(tf.float32, name='corrupt_cls')

    # input of the reconstruction network
    # np.tanh(2) = 0.964
    current_input1 = utils.corrupt(x)*corrupt_rec + x*(1-corrupt_rec) \
        if (denoising and phase_train is not None) else x
    current_input1.set_shape(x.get_shape())
    # 2d -> 4d if convolution
    current_input1 = utils.to_tensor(current_input1) \
        if convolutional else current_input1

    Ws = []
    shapes = []

    # Build the encoder
    for layer_i, n_output in enumerate(n_filters):
        with tf.variable_scope('encoder/{}'.format(layer_i)):
            shapes.append(current_input1.get_shape().as_list())
            if convolutional:
                h, W = utils.conv2d(x=current_input1,
                                    n_output=n_output,
                                    k_h=filter_sizes[layer_i],
                                    k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input1, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            Ws.append(W)
            current_input1 = h

    shapes.append(current_input1.get_shape().as_list())

    with tf.variable_scope('variational'):
        if variational:
            dims = current_input1.get_shape().as_list()
            flattened = utils.flatten(current_input1)

            if n_hidden:
                h = utils.linear(flattened, n_hidden, name='W_fc')[0]
                h = activation(batch_norm(h, phase_train, 'fc/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = flattened

            z_mu = utils.linear(h, n_code, name='mu')[0]
            z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0]
            # modified by yidawang
            # s, u, v = tf.svd(z_log_sigma)
            # z_log_sigma = tf.matmul(
            #        tf.matmul(u, tf.diag(s)), tf.transpose(v))
            # end yidawang

            # Sample from noise distribution p(eps) ~ N(0, 1)
            epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code]))

            # Sample from posterior
            z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma))

            if n_hidden:
                h = utils.linear(z, n_hidden, name='fc_t')[0]
                h = activation(batch_norm(h, phase_train, 'fc_t/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = z

            size = dims[1] * dims[2] * dims[3] if convolutional else dims[1]
            h = utils.linear(h, size, name='fc_t2')[0]
            current_input1 = activation(batch_norm(h, phase_train, 'fc_t2/bn'))
            if dropout:
                current_input1 = tf.nn.dropout(current_input1, keep_prob)

            if convolutional:
                current_input1 = tf.reshape(
                    current_input1,
                    tf.stack([
                        tf.shape(current_input1)[0], dims[1], dims[2], dims[3]
                    ]))
        else:
            z = current_input1

    shapes.reverse()
    n_filters.reverse()
    Ws.reverse()

    n_filters += [input_shape[-1]]

    # %%
    # Decoding layers
    for layer_i, n_output in enumerate(n_filters[1:]):
        with tf.variable_scope('decoder/{}'.format(layer_i)):
            shape = shapes[layer_i + 1]
            if convolutional:
                h, W = utils.deconv2d(x=current_input1,
                                      n_output_h=shape[1],
                                      n_output_w=shape[2],
                                      n_output_ch=shape[3],
                                      n_input_ch=shapes[layer_i][3],
                                      k_h=filter_sizes[layer_i],
                                      k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input1, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            current_input1 = h

    y = current_input1
    t_flat = utils.flatten(t)
    y_flat = utils.flatten(y)

    # l2 loss
    loss_x = tf.reduce_mean(
        tf.reduce_sum(tf.squared_difference(t_flat, y_flat), 1))
    loss_z = 0

    if variational:
        # Variational lower bound, kl-divergence
        loss_z = tf.reduce_mean(-0.5 * tf.reduce_sum(
            1.0 + 2.0 * z_log_sigma - tf.square(z_mu) -
            tf.exp(2.0 * z_log_sigma), 1))

        # Add l2 loss
        cost_vae = tf.reduce_mean(loss_x + loss_z)
    else:
        # Just optimize l2 loss
        cost_vae = tf.reduce_mean(loss_x)

    # Alexnet for clasification based on softmax using TensorFlow slim
    if softmax:
        axis = list(range(len(x.get_shape())))
        mean1, variance1 = tf.nn.moments(t, axis) \
            if (phase_train is True) else tf.nn.moments(x, axis)
        mean2, variance2 = tf.nn.moments(y, axis)
        var_prob = variance2 / variance1

        # Input of the classification network
        current_input2 = utils.corrupt(x)*corrupt_cls + \
            x*(1-corrupt_cls) \
            if (denoising and phase_train is True) else x
        current_input2.set_shape(x.get_shape())
        current_input2 = utils.to_tensor(current_input2) \
            if convolutional else current_input2

        y_concat = tf.concat([current_input2, y], 3)
        with tf.variable_scope('deconv/concat'):
            shape = shapes[layer_i + 1]
            if convolutional:
                # Here we set the input of classification network is
                # the twice of
                # the input of the reconstruction network
                # 112->224 for alexNet and 150->300 for inception v3 and v4
                y_concat, W = utils.deconv2d(
                    x=y_concat,
                    n_output_h=y_concat.get_shape()[1] * 2,
                    n_output_w=y_concat.get_shape()[1] * 2,
                    n_output_ch=y_concat.get_shape()[3],
                    n_input_ch=y_concat.get_shape()[3],
                    k_h=3,
                    k_w=3)
                Ws.append(W)

        # The following are optional networks for classification network
        if classifier == 'squeezenet':
            predictions, net = squeezenet.squeezenet(y_concat, num_classes=13)
        elif classifier == 'zigzagnet':
            predictions, net = squeezenet.zigzagnet(y_concat, num_classes=13)
        elif classifier == 'alexnet_v2':
            predictions, end_points = alexnet.alexnet_v2(y_concat,
                                                         num_classes=13)
        elif classifier == 'inception_v1':
            predictions, end_points = inception.inception_v1(y_concat,
                                                             num_classes=13)
        elif classifier == 'inception_v2':
            predictions, end_points = inception.inception_v2(y_concat,
                                                             num_classes=13)
        elif classifier == 'inception_v3':
            predictions, end_points = inception.inception_v3(y_concat,
                                                             num_classes=13)

        label_onehot = tf.one_hot(label, 13, axis=-1, dtype=tf.int32)
        cost_s = tf.losses.softmax_cross_entropy(label_onehot, predictions)
        cost_s = tf.reduce_mean(cost_s)
        acc = tf.nn.in_top_k(predictions, label, 1)
    else:
        predictions = tf.one_hot(label, 13, 1, 0)
        label_onehot = tf.one_hot(label, 13, 1, 0)
        cost_s = 0
        acc = 0
    # Using Summaries for Tensorboard
    tf.summary.scalar('cost_vae', cost_vae)
    tf.summary.scalar('cost_s', cost_s)
    tf.summary.scalar('loss_x', loss_x)
    tf.summary.scalar('loss_z', loss_z)
    tf.summary.scalar('corrupt_rec', corrupt_rec)
    tf.summary.scalar('corrupt_cls', corrupt_cls)
    tf.summary.scalar('var_prob', var_prob)
    merged = tf.summary.merge_all()

    return {
        'cost_vae': cost_vae,
        'cost_s': cost_s,
        'loss_x': loss_x,
        'loss_z': loss_z,
        'Ws': Ws,
        'x': x,
        't': t,
        'label': label,
        'label_onehot': label_onehot,
        'predictions': predictions,
        'z': z,
        'y': y,
        'acc': acc,
        'keep_prob': keep_prob,
        'corrupt_rec': corrupt_rec,
        'corrupt_cls': corrupt_cls,
        'var_prob': var_prob,
        'train': phase_train,
        'merged': merged
    }
def VAE(input_shape=[None, 784],
        n_filters=[64, 64, 64],
        filter_sizes=[4, 4, 4],
        n_hidden=32,
        n_code=2,
        activation=tf.nn.tanh,
        dropout=False,
        denoising=False,
        convolutional=False,
        variational=False):
    """(Variational) (Convolutional) (Denoising) Autoencoder.
    Uses tied weights.
    Parameters
    ----------
    input_shape : list, optional
        Shape of the input to the network. e.g. for MNIST: [None, 784].
    n_filters : list, optional
        Number of filters for each layer.
        If convolutional=True, this refers to the total number of output
        filters to create for each layer, with each layer's number of output
        filters as a list.
        If convolutional=False, then this refers to the total number of neurons
        for each layer in a fully connected network.
    filter_sizes : list, optional
        Only applied when convolutional=True.  This refers to the ksize (height
        and width) of each convolutional layer.
    n_hidden : int, optional
        Only applied when variational=True.  This refers to the first fully
        connected layer prior to the variational embedding, directly after
        the encoding.  After the variational embedding, another fully connected
        layer is created with the same size prior to decoding.  Set to 0 to
        not use an additional hidden layer.
    n_code : int, optional
        Only applied when variational=True.  This refers to the number of
        latent Gaussians to sample for creating the inner most encoding.
    activation : function, optional
        Activation function to apply to each layer, e.g. tf.nn.relu
    dropout : bool, optional
        Whether or not to apply dropout.  If using dropout, you must feed a
        value for 'keep_prob', as returned in the dictionary.  1.0 means no
        dropout is used.  0.0 means every connection is dropped.  Sensible
        values are between 0.5-0.8.
    denoising : bool, optional
        Whether or not to apply denoising.  If using denoising, you must feed a
        value for 'corrupt_prob', as returned in the dictionary.  1.0 means no
        corruption is used.  0.0 means every feature is corrupted.  Sensible
        values are between 0.5-0.8.
    convolutional : bool, optional
        Whether or not to use a convolutional network or else a fully connected
        network will be created.  This effects the n_filters parameter's
        meaning.
    variational : bool, optional
        Whether or not to create a variational embedding layer.  This will
        create a fully connected layer after the encoding, if `n_hidden` is
        greater than 0, then will create a multivariate gaussian sampling
        layer, then another fully connected layer.  The size of the fully
        connected layers are determined by `n_hidden`, and the size of the
        sampling layer is determined by `n_code`.
    Returns
    -------
    model : dict
        {
            'cost': Tensor to optimize.
            'Ws': All weights of the encoder.
            'x': Input Placeholder
            'z': Inner most encoding Tensor (latent features)
            'y': Reconstruction of the Decoder
            'keep_prob': Amount to keep when using Dropout
            'corrupt_prob': Amount to corrupt when using Denoising
            'train': Set to True when training/Applies to Batch Normalization.
        }
    """
    # network input / placeholders for train (bn) and dropout
    x = tf.placeholder(tf.float32, input_shape, 'x')
    phase_train = tf.placeholder(tf.bool, name='phase_train')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    corrupt_prob = tf.placeholder(tf.float32, [1])

    # apply noise if denoising
    x_ = (utils.corrupt(x) * corrupt_prob + x *
          (1 - corrupt_prob)) if denoising else x

    # 2d -> 4d if convolution
    x_tensor = utils.to_tensor(x_) if convolutional else x_
    current_input = x_tensor

    Ws = []
    shapes = []

    # Build the encoder
    for layer_i, n_output in enumerate(n_filters):
        with tf.variable_scope('encoder/{}'.format(layer_i)):
            shapes.append(current_input.get_shape().as_list())
            if convolutional:
                h, W = utils.conv2d(x=current_input,
                                    n_output=n_output,
                                    k_h=filter_sizes[layer_i],
                                    k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            Ws.append(W)
            current_input = h

    shapes.append(current_input.get_shape().as_list())

    with tf.variable_scope('variational'):
        if variational:
            dims = current_input.get_shape().as_list()
            flattened = utils.flatten(current_input)

            if n_hidden:
                h = utils.linear(flattened, n_hidden, name='W_fc')[0]
                h = activation(batch_norm(h, phase_train, 'fc/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = flattened

            z_mu = utils.linear(h, n_code, name='mu')[0]
            z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0]

            # Sample from noise distribution p(eps) ~ N(0, 1)
            epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code]))

            # Sample from posterior
            z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma))

            if n_hidden:
                h = utils.linear(z, n_hidden, name='fc_t')[0]
                h = activation(batch_norm(h, phase_train, 'fc_t/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = z

            size = dims[1] * dims[2] * dims[3] if convolutional else dims[1]
            h = utils.linear(h, size, name='fc_t2')[0]
            current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn'))
            if dropout:
                current_input = tf.nn.dropout(current_input, keep_prob)

            if convolutional:
                current_input = tf.reshape(
                    current_input,
                    tf.stack([
                        tf.shape(current_input)[0], dims[1], dims[2], dims[3]
                    ]))
        else:
            z = current_input

    shapes.reverse()
    n_filters.reverse()
    Ws.reverse()

    n_filters += [input_shape[-1]]

    # %%
    # Decoding layers
    for layer_i, n_output in enumerate(n_filters[1:]):
        with tf.variable_scope('decoder/{}'.format(layer_i)):
            shape = shapes[layer_i + 1]
            if convolutional:
                h, W = utils.deconv2d(x=current_input,
                                      n_output_h=shape[1],
                                      n_output_w=shape[2],
                                      n_output_ch=shape[3],
                                      n_input_ch=shapes[layer_i][3],
                                      k_h=filter_sizes[layer_i],
                                      k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            current_input = h

    y = current_input
    x_flat = utils.flatten(x)
    y_flat = utils.flatten(y)

    # l2 loss
    loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1)

    if variational:
        # variational lower bound, kl-divergence
        loss_z = -0.5 * tf.reduce_sum(
            1.0 + 2.0 * z_log_sigma - tf.square(z_mu) -
            tf.exp(2.0 * z_log_sigma), 1)

        # add l2 loss
        cost = tf.reduce_mean(loss_x + loss_z)
    else:
        # just optimize l2 loss
        cost = tf.reduce_mean(loss_x)

    return {
        'cost': cost,
        'Ws': Ws,
        'x': x,
        'z': z,
        'y': y,
        'keep_prob': keep_prob,
        'corrupt_prob': corrupt_prob,
        'train': phase_train
    }
예제 #6
0
def decoder(z,
            phase_train,
            dimensions=[],
            channels=[],
            filter_sizes=[],
            convolutional=False,
            activation=tf.nn.relu,
            output_activation=tf.nn.tanh,
            reuse=None):
    """Decoder network codes input `x` to layers defined by dimensions.

    In contrast with `encoder`, this requires information on the number of
    output channels in each layer for convolution.  Otherwise, it is mostly
    the same.

    Parameters
    ----------
    z : tf.Tensor
        Input to the decoder network, e.g. tf.Placeholder or tf.Variable
    phase_train : tf.Placeholder
        Placeholder defining whether the network is in train mode or not.
        Used for changing the behavior of batch normalization which updates
        its statistics during train mode.
    dimensions : list, optional
        List of the number of neurons in each layer (convolutional=False) -or-
        List of the number of filters in each layer (convolutional=True), e.g.
        [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer.
    channels : list, optional
        For decoding when convolutional=True, require the number of output
        channels in each layer.
    filter_sizes : list, optional
        List of the size of the kernel in each layer, e.g.:
        [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer.
    convolutional : bool, optional
        Whether or not to use convolutional layers.
    activation : fn, optional
        Function for applying an activation, e.g. tf.nn.relu
    output_activation : fn, optional
        Function for applying an activation on the last layer, e.g. tf.nn.relu
    reuse : bool, optional
        For each layer's variable scope, whether to reuse existing variables.

    Returns
    -------
    h : tf.Tensor
        Output tensor of the decoder
    """

    if convolutional:
        with tf.variable_scope('fc', reuse=reuse):
            z1, W = linear(
                x=z,
                n_output=channels[0] * dimensions[0][0] * dimensions[0][1],
                reuse=reuse)
            rsz = tf.reshape(
                z1, [-1, dimensions[0][0], dimensions[0][1], channels[0]])
            current_input = activation(
                features=bn.batch_norm(
                    name='bn',
                    x=rsz,
                    phase_train=phase_train,
                    reuse=reuse))

        dimensions = dimensions[1:]
        channels = channels[1:]
        filter_sizes = filter_sizes[1:]
    else:
        current_input = z

    for layer_i, n_output in enumerate(dimensions):
        with tf.variable_scope(str(layer_i), reuse=reuse):

            if convolutional:
                h, W = deconv2d(
                    x=current_input,
                    n_output_h=n_output[0],
                    n_output_w=n_output[1],
                    n_output_ch=channels[layer_i],
                    k_h=filter_sizes[layer_i],
                    k_w=filter_sizes[layer_i],
                    padding='SAME',
                    reuse=reuse)
            else:
                h, W = linear(
                    x=current_input,
                    n_output=n_output,
                    reuse=reuse)

            if layer_i < len(dimensions) - 1:
                norm = bn.batch_norm(
                    x=h,
                    phase_train=phase_train,
                    name='bn', reuse=reuse)
                output = activation(norm)
            else:
                output = h
        current_input = output

    if output_activation is None:
        return current_input
    else:
        return output_activation(current_input)
예제 #7
0
def encoder(x, phase_train, dimensions=[], filter_sizes=[],
            convolutional=False, activation=tf.nn.relu,
            output_activation=tf.nn.sigmoid, reuse=False):
    """Encoder network codes input `x` to layers defined by dimensions.

    Parameters
    ----------
    x : tf.Tensor
        Input to the encoder network, e.g. tf.Placeholder or tf.Variable
    phase_train : tf.Placeholder
        Placeholder defining whether the network is in train mode or not.
        Used for changing the behavior of batch normalization which updates
        its statistics during train mode.
    dimensions : list, optional
        List of the number of neurons in each layer (convolutional=False) -or-
        List of the number of filters in each layer (convolutional=True), e.g.
        [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer.
    filter_sizes : list, optional
        List of the size of the kernel in each layer, e.g.:
        [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer.
    convolutional : bool, optional
        Whether or not to use convolutional layers.
    activation : fn, optional
        Function for applying an activation, e.g. tf.nn.relu
    output_activation : fn, optional
        Function for applying an activation on the last layer, e.g. tf.nn.relu
    reuse : bool, optional
        For each layer's variable scope, whether to reuse existing variables.

    Returns
    -------
    h : tf.Tensor
        Output tensor of the encoder
    """
    # %%
    # ensure 2-d is converted to square tensor.
    if convolutional:
        x_tensor = to_tensor(x)
    else:
        x_tensor = tf.reshape(
            tensor=x,
            shape=[-1, dimensions[0]])
        dimensions = dimensions[1:]
    current_input = x_tensor

    for layer_i, n_output in enumerate(dimensions):
        with tf.variable_scope(str(layer_i), reuse=reuse):
            if convolutional:
                h, W = conv2d(
                    x=current_input,
                    n_output=n_output,
                    k_h=filter_sizes[layer_i],
                    k_w=filter_sizes[layer_i],
                    padding='SAME',
                    reuse=reuse)
            else:
                h, W = linear(
                    x=current_input,
                    n_output=n_output,
                    reuse=reuse)
            norm = bn.batch_norm(
                x=h,
                phase_train=phase_train,
                name='bn',
                reuse=reuse)
            output = activation(norm)

        current_input = output

    flattened = flatten(current_input, name='flatten', reuse=reuse)

    if output_activation is None:
        return flattened
    else:
        return output_activation(flattened)
예제 #8
0
파일: gan.py 프로젝트: meftaul/kadenze-cadl
def decoder(z,
            phase_train,
            dimensions=[],
            channels=[],
            filter_sizes=[],
            convolutional=False,
            activation=tf.nn.relu,
            output_activation=tf.nn.tanh,
            reuse=None):
    """Decoder network codes input `x` to layers defined by dimensions.

    In contrast with `encoder`, this requires information on the number of
    output channels in each layer for convolution.  Otherwise, it is mostly
    the same.

    Parameters
    ----------
    z : tf.Tensor
        Input to the decoder network, e.g. tf.Placeholder or tf.Variable
    phase_train : tf.Placeholder
        Placeholder defining whether the network is in train mode or not.
        Used for changing the behavior of batch normalization which updates
        its statistics during train mode.
    dimensions : list, optional
        List of the number of neurons in each layer (convolutional=False) -or-
        List of the number of filters in each layer (convolutional=True), e.g.
        [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer.
    channels : list, optional
        For decoding when convolutional=True, require the number of output
        channels in each layer.
    filter_sizes : list, optional
        List of the size of the kernel in each layer, e.g.:
        [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer.
    convolutional : bool, optional
        Whether or not to use convolutional layers.
    activation : fn, optional
        Function for applying an activation, e.g. tf.nn.relu
    output_activation : fn, optional
        Function for applying an activation on the last layer, e.g. tf.nn.relu
    reuse : bool, optional
        For each layer's variable scope, whether to reuse existing variables.

    Returns
    -------
    h : tf.Tensor
        Output tensor of the decoder
    """

    if convolutional:
        with tf.variable_scope('fc', reuse=reuse):
            z1, W = linear(x=z,
                           n_output=channels[0] * dimensions[0][0] *
                           dimensions[0][1],
                           reuse=reuse)
            rsz = tf.reshape(
                z1, [-1, dimensions[0][0], dimensions[0][1], channels[0]])
            current_input = activation(features=bn.batch_norm(
                name='bn', x=rsz, phase_train=phase_train, reuse=reuse))

        dimensions = dimensions[1:]
        channels = channels[1:]
        filter_sizes = filter_sizes[1:]
    else:
        current_input = z

    for layer_i, n_output in enumerate(dimensions):
        with tf.variable_scope(str(layer_i), reuse=reuse):

            if convolutional:
                h, W = deconv2d(x=current_input,
                                n_output_h=n_output[0],
                                n_output_w=n_output[1],
                                n_output_ch=channels[layer_i],
                                k_h=filter_sizes[layer_i],
                                k_w=filter_sizes[layer_i],
                                padding='SAME',
                                reuse=reuse)
            else:
                h, W = linear(x=current_input, n_output=n_output, reuse=reuse)

            if layer_i < len(dimensions) - 1:
                norm = bn.batch_norm(x=h,
                                     phase_train=phase_train,
                                     name='bn',
                                     reuse=reuse)
                output = activation(norm)
            else:
                output = h
        current_input = output

    if output_activation is None:
        return current_input
    else:
        return output_activation(current_input)
예제 #9
0
파일: gan.py 프로젝트: meftaul/kadenze-cadl
def encoder(x,
            phase_train,
            dimensions=[],
            filter_sizes=[],
            convolutional=False,
            activation=tf.nn.relu,
            output_activation=tf.nn.sigmoid,
            reuse=False):
    """Encoder network codes input `x` to layers defined by dimensions.

    Parameters
    ----------
    x : tf.Tensor
        Input to the encoder network, e.g. tf.Placeholder or tf.Variable
    phase_train : tf.Placeholder
        Placeholder defining whether the network is in train mode or not.
        Used for changing the behavior of batch normalization which updates
        its statistics during train mode.
    dimensions : list, optional
        List of the number of neurons in each layer (convolutional=False) -or-
        List of the number of filters in each layer (convolutional=True), e.g.
        [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer.
    filter_sizes : list, optional
        List of the size of the kernel in each layer, e.g.:
        [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer.
    convolutional : bool, optional
        Whether or not to use convolutional layers.
    activation : fn, optional
        Function for applying an activation, e.g. tf.nn.relu
    output_activation : fn, optional
        Function for applying an activation on the last layer, e.g. tf.nn.relu
    reuse : bool, optional
        For each layer's variable scope, whether to reuse existing variables.

    Returns
    -------
    h : tf.Tensor
        Output tensor of the encoder
    """
    # %%
    # ensure 2-d is converted to square tensor.
    if convolutional:
        x_tensor = to_tensor(x)
    else:
        x_tensor = tf.reshape(tensor=x, shape=[-1, dimensions[0]])
        dimensions = dimensions[1:]
    current_input = x_tensor

    for layer_i, n_output in enumerate(dimensions):
        with tf.variable_scope(str(layer_i), reuse=reuse):
            if convolutional:
                h, W = conv2d(x=current_input,
                              n_output=n_output,
                              k_h=filter_sizes[layer_i],
                              k_w=filter_sizes[layer_i],
                              padding='SAME',
                              reuse=reuse)
            else:
                h, W = linear(x=current_input, n_output=n_output, reuse=reuse)
            norm = bn.batch_norm(x=h,
                                 phase_train=phase_train,
                                 name='bn',
                                 reuse=reuse)
            output = activation(norm)

        current_input = output

    flattened = flatten(current_input, name='flatten', reuse=reuse)

    if output_activation is None:
        return flattened
    else:
        return output_activation(flattened)
예제 #10
0
is_training = tf.placeholder(tf.bool, name='is_training')

# %% We'll convert our MNIST vector data to a 4-D tensor:
# N x W x H x C
x_tensor = tf.reshape(x, [-1, 28, 28, 1])
#ema.apply([batch_mean, batch_var])
# %% We'll use a new method called  batch normalization.
# This process attempts to "reduce internal covariate shift"
# which is a fancy way of saying that it will normalize updates for each
# batch using a smoothed version of the batch mean and variance
'''
# The original paper proposes using this before any nonlinearities!!!!!!!!!!!!!!!
'''
# The original paper proposes using this before any nonlinearities!!!!!!!!!!!!!!!
h_1 = lrelu(batch_norm(conv2d(x_tensor, 32, name='conv1'),
                       is_training,
                       scope='bn1'),
            name='lrelu1')
h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2'), is_training,
                       scope='bn2'),
            name='lrelu2')
h_3 = lrelu(batch_norm(conv2d(h_2, 64, name='conv3'), is_training,
                       scope='bn3'),
            name='lrelu3')
h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4])
h_4 = linear(h_3_flat, 10)
y_pred = tf.nn.softmax(h_4)

# %% Define loss/eval/training functions
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred))
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
예제 #11
0
# %% We add a new type of placeholder to denote when we are training.
# This will be used to change the way we compute the network during
# training/testing.
is_training = tf.placeholder(tf.bool, name='is_training')

# %% We'll convert our MNIST vector data to a 4-D tensor:
# N x W x H x C
x_tensor = tf.reshape(x, [-1, 28, 28, 1])

# %% We'll use a new method called  batch normalization.
# This process attempts to "reduce internal covariate shift"
# which is a fancy way of saying that it will normalize updates for each
# batch using a smoothed version of the batch mean and variance
# The original paper proposes using this before any nonlinearities
h_1 = lrelu(batch_norm(conv2d(x_tensor, 32, name='conv1'),
                       is_training, scope='bn1'), name='lrelu1')
h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2'),
                       is_training, scope='bn2'), name='lrelu2')
h_3 = lrelu(batch_norm(conv2d(h_2, 64, name='conv3'),
                       is_training, scope='bn3'), name='lrelu3')
h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4])
h_4 = linear(h_3_flat, 10)
y_pred = tf.nn.softmax(h_4)

# %% Define loss/eval/training functions
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred))
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
예제 #12
0
파일: vae.py 프로젝트: Liubinggunzu/CADL
def VAE(input_shape=[None, 784],
        n_filters=[64, 64, 64],
        filter_sizes=[4, 4, 4],
        n_hidden=32,
        n_code=2,
        activation=tf.nn.tanh,
        dropout=False,
        denoising=False,
        convolutional=False,
        variational=False):
    """(Variational) (Convolutional) (Denoising) Autoencoder.

    Uses tied weights.

    Parameters
    ----------
    input_shape : list, optional
        Shape of the input to the network. e.g. for MNIST: [None, 784].
    n_filters : list, optional
        Number of filters for each layer.
        If convolutional=True, this refers to the total number of output
        filters to create for each layer, with each layer's number of output
        filters as a list.
        If convolutional=False, then this refers to the total number of neurons
        for each layer in a fully connected network.
    filter_sizes : list, optional
        Only applied when convolutional=True.  This refers to the ksize (height
        and width) of each convolutional layer.
    n_hidden : int, optional
        Only applied when variational=True.  This refers to the first fully
        connected layer prior to the variational embedding, directly after
        the encoding.  After the variational embedding, another fully connected
        layer is created with the same size prior to decoding.  Set to 0 to
        not use an additional hidden layer.
    n_code : int, optional
        Only applied when variational=True.  This refers to the number of
        latent Gaussians to sample for creating the inner most encoding.
    activation : function, optional
        Activation function to apply to each layer, e.g. tf.nn.relu
    dropout : bool, optional
        Whether or not to apply dropout.  If using dropout, you must feed a
        value for 'keep_prob', as returned in the dictionary.  1.0 means no
        dropout is used.  0.0 means every connection is dropped.  Sensible
        values are between 0.5-0.8.
    denoising : bool, optional
        Whether or not to apply denoising.  If using denoising, you must feed a
        value for 'corrupt_prob', as returned in the dictionary.  1.0 means no
        corruption is used.  0.0 means every feature is corrupted.  Sensible
        values are between 0.5-0.8.
    convolutional : bool, optional
        Whether or not to use a convolutional network or else a fully connected
        network will be created.  This effects the n_filters parameter's
        meaning.
    variational : bool, optional
        Whether or not to create a variational embedding layer.  This will
        create a fully connected layer after the encoding, if `n_hidden` is
        greater than 0, then will create a multivariate gaussian sampling
        layer, then another fully connected layer.  The size of the fully
        connected layers are determined by `n_hidden`, and the size of the
        sampling layer is determined by `n_code`.

    Returns
    -------
    model : dict
        {
            'cost': Tensor to optimize.
            'Ws': All weights of the encoder.
            'x': Input Placeholder
            'z': Inner most encoding Tensor (latent features)
            'y': Reconstruction of the Decoder
            'keep_prob': Amount to keep when using Dropout
            'corrupt_prob': Amount to corrupt when using Denoising
            'train': Set to True when training/Applies to Batch Normalization.
        }
    """
    # network input / placeholders for train (bn) and dropout
    x = tf.placeholder(tf.float32, input_shape, 'x')
    phase_train = tf.placeholder(tf.bool, name='phase_train')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    corrupt_prob = tf.placeholder(tf.float32, [1])

    # apply noise if denoising
    x_ = (utils.corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)) if denoising else x

    # 2d -> 4d if convolution
    x_tensor = utils.to_tensor(x_) if convolutional else x_
    current_input = x_tensor

    Ws = []
    shapes = []

    # Build the encoder
    for layer_i, n_output in enumerate(n_filters):
        with tf.variable_scope('encoder/{}'.format(layer_i)):
            shapes.append(current_input.get_shape().as_list())
            if convolutional:
                h, W = utils.conv2d(x=current_input,
                                    n_output=n_output,
                                    k_h=filter_sizes[layer_i],
                                    k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input,
                                    n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            Ws.append(W)
            current_input = h

    shapes.append(current_input.get_shape().as_list())

    with tf.variable_scope('variational'):
        if variational:
            dims = current_input.get_shape().as_list()
            flattened = utils.flatten(current_input)

            if n_hidden:
                h = utils.linear(flattened, n_hidden, name='W_fc')[0]
                h = activation(batch_norm(h, phase_train, 'fc/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = flattened

            z_mu = utils.linear(h, n_code, name='mu')[0]
            z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0]

            # Sample from noise distribution p(eps) ~ N(0, 1)
            epsilon = tf.random_normal(
                tf.stack([tf.shape(x)[0], n_code]))

            # Sample from posterior
            z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma))

            if n_hidden:
                h = utils.linear(z, n_hidden, name='fc_t')[0]
                h = activation(batch_norm(h, phase_train, 'fc_t/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = z

            size = dims[1] * dims[2] * dims[3] if convolutional else dims[1]
            h = utils.linear(h, size, name='fc_t2')[0]
            current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn'))
            if dropout:
                current_input = tf.nn.dropout(current_input, keep_prob)

            if convolutional:
                current_input = tf.reshape(
                    current_input, tf.stack([
                        tf.shape(current_input)[0],
                        dims[1],
                        dims[2],
                        dims[3]]))
        else:
            z = current_input

    shapes.reverse()
    n_filters.reverse()
    Ws.reverse()

    n_filters += [input_shape[-1]]

    # %%
    # Decoding layers
    for layer_i, n_output in enumerate(n_filters[1:]):
        with tf.variable_scope('decoder/{}'.format(layer_i)):
            shape = shapes[layer_i + 1]
            if convolutional:
                h, W = utils.deconv2d(x=current_input,
                                      n_output_h=shape[1],
                                      n_output_w=shape[2],
                                      n_output_ch=shape[3],
                                      n_input_ch=shapes[layer_i][3],
                                      k_h=filter_sizes[layer_i],
                                      k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input,
                                    n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            current_input = h

    y = current_input
    x_flat = utils.flatten(x)
    y_flat = utils.flatten(y)

    # l2 loss
    loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1)

    if variational:
        # variational lower bound, kl-divergence
        loss_z = -0.5 * tf.reduce_sum(
            1.0 + 2.0 * z_log_sigma -
            tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1)

        # add l2 loss
        cost = tf.reduce_mean(loss_x + loss_z)
    else:
        # just optimize l2 loss
        cost = tf.reduce_mean(loss_x)

    return {'cost': cost, 'Ws': Ws,
            'x': x, 'z': z, 'y': y,
            'keep_prob': keep_prob,
            'corrupt_prob': corrupt_prob,
            'train': phase_train}
예제 #13
0
def inference(x, y, keepProb, is_training, batch_size, learning_rate):
    """
    Args:
      images: Images returned from distorted_inputs() or inputs().

    Returns:
      Logits.

    """

    with tf.variable_scope('conv1') as scope:
        kernel = tf.Variable(tf.random_normal([11, 11, 3, 48], stddev=1e-4),
                             name='weights')
        conv = tf.nn.conv2d(x, kernel, [1, 4, 4, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.001, dtype=tf.float32, shape=[48]),
                             name='biases')

        bias = tf.nn.bias_add(conv, biases)
        conv1 = batch_norm(lrelu(bias, name=scope.name),
                           is_training,
                           scope='bn1')

    # pool1
    pool1 = tf.nn.max_pool(conv1,
                           ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1],
                           padding='SAME',
                           name='pool1')

    # conv2
    with tf.variable_scope('conv2') as scope:
        kernel = tf.Variable(tf.random_normal([5, 5, 48, 128], stddev=1e-4),
                             name='weights')
        conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1],
                            padding='SAME')  #use_cudnn_on_gpu=False,
        biases = tf.Variable(tf.constant(0.001, dtype=tf.float32, shape=[128]),
                             name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv2 = batch_norm(lrelu(bias, name=scope.name),
                           is_training,
                           scope='bn2')

    # pool2
    pool2 = tf.nn.max_pool(conv2,
                           ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1],
                           padding='SAME',
                           name='pool2')

    # conv3
    with tf.variable_scope('conv3') as scope:
        kernel = tf.Variable(tf.random_normal([3, 3, 128, 192], stddev=1e-2),
                             name='weights')
        conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.001, dtype=tf.float32, shape=[192]),
                             name='biases')

        bias = tf.nn.bias_add(conv, biases)
        conv3 = lrelu(bias, name=scope.name)

    # conv4
    with tf.variable_scope('conv4') as scope:
        kernel = tf.Variable(tf.random_normal([3, 3, 192, 192], stddev=1e-2),
                             name='weights')
        conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1],
                            padding='SAME')  #use_cudnn_on_gpu=False,
        biases = tf.Variable(tf.constant(0.001, dtype=tf.float32, shape=[192]),
                             name='biases')

        bias = tf.nn.bias_add(conv, biases)
        conv4 = lrelu(bias, name=scope.name)

    # conv5
    with tf.variable_scope('conv5') as scope:
        kernel = tf.Variable(tf.random_normal([5, 5, 192, 128], stddev=1e-2),
                             name='weights')
        conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1],
                            padding='SAME')  #use_cudnn_on_gpu=False,
        biases = tf.Variable(tf.constant(0.001, dtype=tf.float32, shape=[128]),
                             name='biases')

        bias = tf.nn.bias_add(conv, biases)
        conv5 = lrelu(bias, name=scope.name)

        pool3 = tf.nn.max_pool(conv5,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name='pool3')

    # local1
    with tf.variable_scope('local1') as scope:
        # Move everything into depth so we can perform a single matrix multiply.
        dim = 1
        for d in pool3.get_shape()[1:].as_list():
            dim *= d
        reshape = tf.reshape(pool3, [batch_size, dim])

        weights = tf.Variable(tf.random_normal([dim, 4096],
                                               stddev=np.sqrt(1 / dim + 4096)),
                              name='weights')
        biases = tf.Variable(tf.constant(0.0001,
                                         dtype=tf.float32,
                                         shape=[4096]),
                             name='biases')
        local1 = tf.nn.relu_layer(reshape, weights, biases)

        local1 = tf.nn.dropout(local1, keepProb)

    # local2
    with tf.variable_scope('local2') as scope:
        weights = tf.Variable(tf.random_normal([4096, 4096],
                                               stddev=np.sqrt(1 / 4096.0)),
                              name='weights')
        biases = tf.Variable(tf.constant(0.0001,
                                         dtype=tf.float32,
                                         shape=[4096]),
                             name='biases')

        local2 = tf.nn.relu_layer(local1, weights, biases)

        local2 = tf.nn.dropout(local2, keepProb)

    with tf.variable_scope('softmax_linear') as scope:
        weights = tf.Variable(tf.random_normal([4096, 20],
                                               stddev=np.sqrt(1 / 4096.0)),
                              name='weights')
        biases = tf.Variable(tf.constant(0.01, dtype=tf.float32, shape=[20]),
                             name='biases')
        softmax_linear = tf.nn.xw_plus_b(local2, weights, biases)

    with tf.name_scope("loss"):
        loss = tf.nn.l2_loss((y - softmax_linear), name="L2_loss")

    with tf.name_scope("train_step"):
        train_op = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)

    return train_op, softmax_linear, loss
예제 #14
0
# N x W x H x C

x_tensor = tf.reshape(x, [-1, 1, 26, 1])  # FOR MFCC-26 dim
#x_tensor = tf.reshape(x, [-1, 1, 40, 1])  # FOR CONVAE

# %% We'll use a new method called  batch normalization.
# This process attempts to "reduce internal covariate shift"
# which is a fancy way of saying that it will normalize updates for each
# batch using a smoothed version of the batch mean and variance
# The original paper proposes using this before any nonlinearities

h_1 = lrelu(batch_norm(conv2d(x_tensor,
                              32,
                              name='conv1',
                              stride_h=1,
                              k_h=1,
                              k_w=3,
                              pool_size=[1, 1, 2, 1],
                              pool_stride=[1, 1, 1, 1]),
                       phase_train=is_training,
                       scope='bn1'),
            name='lrelu1')

h_2 = lrelu(batch_norm(conv2d(h_1,
                              64,
                              name='conv2',
                              stride_h=1,
                              k_h=1,
                              k_w=3,
                              pool_size=[1, 1, 2, 1],
                              pool_stride=[1, 1, 1, 1]),
                       phase_train=is_training,
import tensorflow as tf
from libs.batch_norm import batch_norm
from libs.activations import lrelu
from libs.connections import conv2d, linear
from libs.datasets import MNIST


# %% Setup input to the network and true output label.  These are
# simply placeholders which we'll fill in later.
mnist = MNIST()
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
x_tensor = tf.reshape(x, [-1, 28, 28, 1])

# %% Define the network:
bn1 = batch_norm(-1, name='bn1')
bn2 = batch_norm(-1, name='bn2')
bn3 = batch_norm(-1, name='bn3')
h_1 = lrelu(bn1(conv2d(x_tensor, 32, name='conv1')), name='lrelu1')
h_2 = lrelu(bn2(conv2d(h_1, 64, name='conv2')), name='lrelu2')
h_3 = lrelu(bn3(conv2d(h_2, 64, name='conv3')), name='lrelu3')
h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4])
h_4 = linear(h_3_flat, 10)
y_pred = tf.nn.softmax(h_4)

# %% Define loss/eval/training functions
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred))
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))