Example #1
0
def build_optimizer(logits, y):

    with tf.name_scope('loss'):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=y, logits=logits)
        loss = tf.reduce_mean(xentropy, name='loss')

    with tf.name_scope('train'):
        learning_rate = 0.0001
        lprint('learning_rate =', learning_rate)
        #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.99, beta2=0.999, amsgrad=True)
        #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        optimizer = AMSGrad(learning_rate=learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-8)
        lprint('optimizer = ', optimizer)
        training_op = optimizer.minimize(loss)
    return loss, training_op
Example #2
0
    def __init__(self, input_size_a, input_size):
        super(Extra_Attention_MLP_tf, self).__init__()
        print("init NN model...")

        self.saver = None
        self.sess = tf.Session()

        self.input_size = input_size
        self.input_size_a = input_size_a
        self.X = tf.placeholder(tf.float32, shape=(None, input_size), name='x')
        self.X_A = tf.placeholder(tf.float32, shape=(None, input_size_a), name='x_a')
        self.Y = tf.placeholder(tf.float32, shape=(None, 3), name='y')

        with tf.variable_scope('NN'):
            self.y_pred = self._build_net(self.X_A, self.X, scope='eval_net', trainable=True)
            self.supervised_loss = tf.reduce_mean(tf.square(self.Y - self.y_pred))

            self.supervised_optimizer = tf.train.AdamOptimizer(learning_rate=0.01)  # define optimizer # play around with learning rate
            # self.supervised_train_op = self.supervised_optimizer.minimize(self.supervised_loss)  # minimize losss
            # self.supervised_train_op = AdaBoundOptimizer(learning_rate=0.01, final_lr=0.1,
            #                                             beta1=0.9, beta2=0.999, amsbound=False).minimize(self.supervised_loss)
            self.supervised_train_op = AMSGrad(learning_rate=0.01, beta1=0.9, beta2=0.99,
                                               epsilon=1e-8).minimize(self.supervised_loss)
            #self.supervised_train_op = tf.train.GradientDescentOptimizer(0.01).minimize(self.supervised_loss)

        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()
        self.graph = tf.get_default_graph()
Example #3
0
class OptimizerVGC(object):
    def __init__(self, preds, labels, preds_X, labels_X, model, num_nodes,
                 pos_weight, norm):
        preds_sub = preds
        labels_sub = labels
        preds_X_sub = preds_X
        labels_X_sub = labels_X

        # Define the structure recovery loss
        self.cost = norm * tf.reduce_mean(
            tf.nn.weighted_cross_entropy_with_logits(
                logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
        # Define the attribute (content) recovery loss
        self.cost += norm * tf.reduce_mean(
            tf.nn.weighted_cross_entropy_with_logits(logits=preds_X_sub,
                                                     targets=labels_X_sub,
                                                     pos_weight=pos_weight))
        # Define the cat_loss (second item in our paper) and the kl_loss (third item in our paper)
        z_mean = K.expand_dims(model.z_mean, 1)
        z_log_var = K.expand_dims(model.z_log_std, 1)
        y = model.y
        z_prior_mean = model.z_prior_mean

        kl_loss = -0.5 * (z_log_var - K.square(z_prior_mean))
        kl_loss = K.mean(K.batch_dot(K.expand_dims(y, 1), kl_loss),
                         0) / num_nodes
        cat_loss = K.mean(y * K.log(y + K.epsilon()), 0) / num_nodes

        # Final loss
        self.cost = K.sum(self.cost) + K.sum(kl_loss) + K.sum(cat_loss)

        self.cost = tf.clip_by_value(self.cost, 1e-10, 1e100)

        self.optimizer = AMSGrad(learning_rate=FLAGS.learning_rate,
                                 beta1=0.9,
                                 beta2=0.99,
                                 epsilon=1e-8)
        self.opt_op = self.optimizer.minimize(self.cost)

        self.grads_vars = self.optimizer.compute_gradients(self.cost)
Example #4
0
        tf.nn.sigmoid_cross_entropy_with_logits(labels=masked_y,
                                                logits=logits))

# Total Cost Function
with tf.name_scope('Total_Cost'):
    cost = ms_cost
    #cost = 0.1*ms_cost + entropy_cost
    #cost = entropy_cost
    #cost = ms_cost - 10.0*ssim_cost
    cost_2 = ms_cost + 10.0 * entropy_cost
    cost_3 = ms_cost + 10.0 * entropy_cost - 10.0 * ssim_cost

# Run Adam Optimizer to minimize cost
with tf.name_scope('Optimizer'):
    if use_AMSGrad:
        optimizer = AMSGrad(learning_rate=learning_rate,
                            epsilon=1e-06).minimize(cost)
        optimizer_2 = AMSGrad(learning_rate=learning_rate,
                              epsilon=1e-06).minimize(cost_2)
        optimizer_3 = AMSGrad(learning_rate=learning_rate,
                              epsilon=1e-06).minimize(cost_3)
    else:
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                           epsilon=1e-06).minimize(cost)
        optimizer_2 = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                             epsilon=1e-06).minimize(cost_2)
        optimizer_3 = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                             epsilon=1e-06).minimize(cost_3)

# Define summary of cost for log file
tf.summary.scalar("MS_Loss", ms_cost)
#tf.summary.scalar("SSIM_Loss", ssim_cost)
Example #5
0
history = get_history_layer(history_input, keep_prob, ht=state_fw)

context = tf.concat([I, history], axis=1)
print 'ss', context
pred = tf.matmul(context, weights['out']) + biases['out']
preds = tf.nn.softmax(pred)
#optimizer
latent_loss = 0.5 * tf.reduce_sum(
    tf.exp(z_stddev) - 1. - z_stddev + tf.square(z_mean), 1)
latent_loss = tf.reduce_mean(latent_loss)
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=next_x, logits=pred))  #
T_COST = cost + 0.01 * latent_loss
optimizer_class = AMSGrad(learning_rate=it_learning_rate,
                          beta1=0.9,
                          beta2=0.99,
                          epsilon=1e-8).minimize(T_COST)
pred_mask = tf.arg_max(pred, 1)  # a list of result


def eos_sentence_batch(sentence_batch, eos_in):
    return [sentence + [eos_in] for sentence in sentence_batch]


initial = tf.global_variables_initializer()


def compute():
    print 'start train'
    all_vars = tf.trainable_variables()
    for v in all_vars[9:17]:
Example #6
0
 def g_optimizer(self, *args, **kwargs):
     return AMSGrad(*args, **kwargs)
def build_faae_harness(image_input: tf.Tensor,
                       noise: tf.Tensor,
                       generator: tf.keras.Model,
                       discriminator: tf.keras.Model,
                       encoder: tf.keras.Model,
                       generator_learning_rate=1e-4,
                       discriminator_learning_rate=2e-4,
                       reconstruction_learning_rate=5e-5,
                       noise_format: str = 'SPHERE',
                       adversarial_training: str = 'WASSERSTEIN',
                       no_trainer: bool = False,
                       summarize_activations: bool = False):
    image_size = image_input.shape.as_list()[1]
    print("Flipped Adversarial Auto-Encoder: {}x{} images".format(
        image_size, image_size))

    def _generator_fn(z):
        return generator([z], training=True)

    def _encoder_fn(x):
        return encoder([x], training=True)

    def _discriminator_fn(x, z):
        return discriminator([x, z], training=True)

    gan_model = aegan_model(
        _generator_fn,
        _discriminator_fn,
        _encoder_fn,
        image_input,
        noise,
        generator_scope='Generator',
        discriminator_scope='Discriminator',
        encoder_scope='Encoder',
        check_shapes=True)  # set to False for 2-level architectures

    sampled_x = gan_model.generated_data
    image_grid_summary(sampled_x, grid_size=3, name='generated_data')
    if summarize_activations:
        tf.contrib.layers.summarize_activations()
    tf.contrib.layers.summarize_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    # summarize encoded Z
    with tf.variable_scope(gan_model.encoder_scope):
        tf.summary.histogram('encoded_z', gan_model.encoder_gen_outputs)

    gan_loss = gan_loss_by_name(gan_model,
                                adversarial_training,
                                add_summaries=True)

    # add auto-encoder reconstruction loss
    rec_loss = code_autoencoder_mse_cosine(gan_model.generator_inputs,
                                           gan_model.encoder_gen_outputs,
                                           1e-3,
                                           add_summary=True)

    if no_trainer:
        train_ops = None
    else:
        train_ops = aegan_train_ops(
            gan_model,
            gan_loss,
            rec_loss,
            generator_optimizer=AMSGrad(generator_learning_rate,
                                        beta1=0.5,
                                        beta2=0.999),
            discriminator_optimizer=AMSGrad(discriminator_learning_rate,
                                            beta1=0.5,
                                            beta2=0.999),
            reconstruction_optimizer=AMSGrad(reconstruction_learning_rate,
                                             beta1=0.5,
                                             beta2=0.999),
            summarize_gradients=True)

    return (gan_model, gan_loss, rec_loss, train_ops)
Example #8
0
def nn(xae_model):
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    mean_img = np.mean(mnist.train.images, axis=0)
    
    # Parameters
    learning_rate = 0.01
    num_steps = 10000
    batch_size = 1280
    test_size = 1000
    display_step = 1000
    
    # Network Parameters
    n_hidden_1 = 64 # 1st layer number of neurons
    n_hidden_2 = 64 # 2nd layer number of neurons
    num_input = 144 # MNIST data input (img shape: 28*28)
    num_classes = 3 # MNIST total classes (0-9 digits)
    
    # tf Graph input
    X = tf.placeholder("float", [None, num_input])
    Y = tf.placeholder("float", [None, num_classes])
    
    # Store layers weight & bias
    weights = {
        'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])),
        'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
        'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden_1])),
        'b2': tf.Variable(tf.random_normal([n_hidden_2])),
        'out': tf.Variable(tf.random_normal([num_classes]))
    }
    
    
    # Create model# Creat 
    def neural_net(x):
        # Hidden fully connected layer with 256 neurons
        layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
        # Hidden fully connected layer with 256 neurons
        layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
        # Output fully connected layer with a neuron for each class
        out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
        return out_layer

    
    # Construct model# Const 
    logits = neural_net(X)
    
    # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits, labels=Y))
    optimizer = AMSGrad(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op)
    
    # Evaluate model (with test logits, for dropout to be disabled)
    correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    prediction = tf.argmax(logits, 1)
    
    # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()
    
    # Start training
    with tf.Session() as sess:
    
        # Run the initializer
        sess.run(init)
    
        for step in range(1, num_steps+1):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            
            batch_x = np.array([img - mean_img for img in batch_x[np.where(np.any(np.array([
                batch_y[:, 0], 
                batch_y[:, 1],
                batch_y[:, 2], 
                # batch_ys[:, 3],
                # batch_ys[:, 4], 
                # batch_ys[:, 5],
                # batch_ys[:, 6], 
                # batch_ys[:, 7],
                # batch_ys[:, 8], 
                # batch_ys[:, 9],
            ]) == 1, axis=0))]])
 
            batch_x = sess.run(xae_model['z'], feed_dict={xae_model['x']: batch_x})
            
            batch_y = np.array([label[0:3] for label in batch_y[np.where(np.any(np.array([
                batch_y[:, 0], 
                batch_y[:, 1],
                batch_y[:, 2], 
                # batch_ys[:, 3],
                # batch_ys[:, 4], 
                # batch_ys[:, 5],
                # batch_ys[:, 6], 
                # batch_ys[:, 7],
                # batch_ys[:, 8], 
                # batch_ys[:, 9],
            ]) == 1, axis=0))]])
            
            
            
            # Run optimization op (backprop)
            sess.run(train_op, feed_dict={X: batch_x[0:128], Y: batch_y[0:128]})
            if step % display_step == 0 or step == 1:
                # Calculate batch loss and accuracy
                loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                     Y: batch_y})
                print("Step " + str(step) + ", Minibatch Loss= " + \
                      "{:.4f}".format(loss) + ", Training Accuracy= " + \
                      "{:.3f}".format(acc))
    
        print("Optimization Finished!")
    

        



        test_x, test_y = mnist.train.next_batch(test_size)
            
        test_x = np.array([img - mean_img for img in test_x[np.where(np.any(np.array([
            test_y[:, 0], 
            test_y[:, 1],
            test_y[:, 2], 
            # batch_ys[:, 3],
            # batch_ys[:, 4], 
            # batch_ys[:, 5],
            # batch_ys[:, 6], 
            # batch_ys[:, 7],
            # batch_ys[:, 8], 
            # batch_ys[:, 9],
        ]) == 1, axis=0))]])
        
        test_x = sess.run(xae_model['z'], feed_dict={xae_model['x']: test_x})
        
        test_y = np.array([label[0:3] for label in test_y[np.where(np.any(np.array([
            test_y[:, 0], 
            test_y[:, 1],
            test_y[:, 2], 
            # batch_ys[:, 3],
            # batch_ys[:, 4], 
            # batch_ys[:, 5],
            # batch_ys[:, 6], 
            # batch_ys[:, 7],
            # batch_ys[:, 8], 
            # batch_ys[:, 9],
        ]) == 1, axis=0))]])
           
        # Calculate accuracy for MNIST test images
        print("Testing Accuracy:", \
            sess.run(accuracy, feed_dict={X: test_x,
                                          Y: test_y}))
        
        pred_y = sess.run(prediction, feed_dict={X: test_x})
        true_y = np.argmax(test_y, axis=1)
        
        score = f1_score(true_y, pred_y, average='weighted')
        print("f1 score:", \
            score)
        
    
    return score    
Example #9
0
def test_mnist():
    '''Test the convolutional autoencder using MNIST.'''
    # %%
    # load MNIST as before
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    mean_img = np.mean(mnist.train.images, axis=0)
 
    ae = eXclusiveAutoencoder(
        input_dimensions = 784,
        layers = [
            {
                'n_channels': 144,
                'reconstructive_regularizer': 1.0, 
                'weight_decay': 1.0, 
                'sparse_regularizer': 1.0, 
                'sparsity_level': 0.05,
                'exclusive_regularizer': 1.0,
                'exclusive_type': 'logcosh',
                'exclusive_logcosh_scale': 10.0,
                'corrupt_prob': 1.0,
                'tied_weight': True,
                'encode':'sigmoid', 'decode':'linear',
                'pathways': [
                    # range(0, 144),
                    range(0, 96),
                    range(48, 144),
                ],
            },                                                                                                 
        ],
        
        init_encoder_weight = None,
        init_decoder_weight = None,
        init_encoder_bias = None,
        init_decoder_bias = None,
    )
 
    # %%
    learning_rate = 0.01
    n_reload_per_epochs = 10
    n_display_per_epochs = 10000
    batch_size = 2000
    n_epochs = 100000
     
     
    optimizer_list = []  
     
    for layer_i in range(1):
        optimizer_list.append(AMSGrad(learning_rate).minimize(ae['layerwise_cost'][layer_i]['total'], var_list=[
                ae['encoder_weight'][layer_i],
                ae['encoder_bias'][layer_i],
                # ae['decoder_weights'][layer_i],
                # ae['decoder_biases'][layer_i],
        ]))
         
    # optimizer_full = tf.train.AdamOptimizer(learning_rate).minimize(ae['cost']['total'])
    
    optimizer_list.append(AMSGrad(learning_rate).minimize(ae['cost']['total']))
     
    # %%
    # We create a session to use the graph
    sess = tf.Session()
    writer = tf.summary.FileWriter('logs', sess.graph)
    sess.run(tf.global_variables_initializer())
 
    # %%
    # Fit all training data
         
    for optimizer_i, (optimizer) in enumerate(optimizer_list):
        for epoch_i in range(n_epochs): 
            if (epoch_i) % n_reload_per_epochs == 0:
                batch_xs, batch_ys = mnist.train.next_batch(batch_size)
                
                
                batch_x0 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([
                    batch_ys[:, 0], 
                    # batch_ys[:, 1],
                    # batch_ys[:, 2], 
                    # batch_ys[:, 3],
                    # batch_ys[:, 4], 
                    # batch_ys[:, 5],
                    # batch_ys[:, 6], 
                    # batch_ys[:, 7],
                    # batch_ys[:, 8], 
                    # batch_ys[:, 9],
                ]) == 1, axis=0))]])
                
                batch_x1 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([
                    # batch_ys[:, 0], 
                    batch_ys[:, 1],
                    # batch_ys[:, 2], 
                    # batch_ys[:, 3],
                    # batch_ys[:, 4], 
                    # batch_ys[:, 5],
                    # batch_ys[:, 6], 
                    # batch_ys[:, 7],
                    # batch_ys[:, 8], 
                    # batch_ys[:, 9],
                ]) == 1, axis=0))]])
                
                batch_x2 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([
                    # batch_ys[:, 0], 
                    # batch_ys[:, 1],
                    batch_ys[:, 2], 
                    # batch_ys[:, 3],
                    # batch_ys[:, 4], 
                    # batch_ys[:, 5],
                    # batch_ys[:, 6], 
                    # batch_ys[:, 7],
                    # batch_ys[:, 8], 
                    # batch_ys[:, 9],
                ]) == 1, axis=0))]])                
                
                min_batch_size_x01 = np.min((batch_x0.shape[0], batch_x1.shape[0]))                
                batch_x01 = 0.5*(batch_x0[:min_batch_size_x01]+batch_x1[:min_batch_size_x01])
                
                min_batch_size_x012 = np.min((batch_x0.shape[0], batch_x1.shape[0], batch_x2.shape[0]))                
                batch_x012 = 0.333*(batch_x0[:min_batch_size_x012]+batch_x1[:min_batch_size_x012]+batch_x2[:min_batch_size_x012])
                 
                batch_x1 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([
                    # batch_ys[:, 0], 
                    batch_ys[:, 1],
                    # batch_ys[:, 2], 
                    # batch_ys[:, 3],
                    # batch_ys[:, 4], 
                    # batch_ys[:, 5],
                    # batch_ys[:, 6], 
                    # batch_ys[:, 7],
                    # batch_ys[:, 8], 
                    # batch_ys[:, 9],
                ]) == 1, axis=0))]])
                 
                batch_x2 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([
                    # batch_ys[:, 0], 
                    # batch_ys[:, 1],
                    batch_ys[:, 2], 
                    # batch_ys[:, 3],
                    # batch_ys[:, 4], 
                    # batch_ys[:, 5],
                    # batch_ys[:, 6], 
                    # batch_ys[:, 7],
                    # batch_ys[:, 8], 
                    # batch_ys[:, 9],
                ]) == 1, axis=0))]])
                 
                min_batch_size_x12 = np.min((batch_x1.shape[0], batch_x2.shape[0]))
                 
                batch_x12 = 0.5*(batch_x1[:min_batch_size_x12]+batch_x2[:min_batch_size_x12])
# 




                
                train = []
                # train.append(batch_x012)
                train.append(batch_x01)
                train.append(batch_x12)
                
#                 train.append(np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([
#                     batch_ys[:, 0], 
#                     batch_ys[:, 1],
#                     # batch_ys[:, 2], 
#                     # batch_ys[:, 3],
#                     # batch_ys[:, 4], 
#                     # batch_ys[:, 5],
#                     # batch_ys[:, 6], 
#                     # batch_ys[:, 7],
#                     # batch_ys[:, 8], 
#                     # batch_ys[:, 9],
#                 ]) == 1, axis=0))]]))
#                 train.append(np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([
#                     # batch_ys[:, 0], 
#                     batch_ys[:, 1],
#                     batch_ys[:, 2], 
#                     # batch_ys[:, 3],
#                     # batch_ys[:, 4], 
#                     # batch_ys[:, 5],
#                     # batch_ys[:, 6], 
#                     # batch_ys[:, 7],
#                     # batch_ys[:, 8], 
#                     # batch_ys[:, 9],
#                 ]) == 1, axis=0))]]))
                     
            sess.run(optimizer, feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    
            if (epoch_i+1) % n_display_per_epochs == 0:
                if not optimizer is optimizer_list[-1]:
                    cost_total = sess.run(ae['layerwise_cost'][optimizer_i]['total'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    cost_reconstruction_error = sess.run(ae['layerwise_cost'][optimizer_i]['reconstruction_error'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    cost_sparsity = sess.run(ae['layerwise_cost'][optimizer_i]['sparsity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    cost_exclusivity = sess.run(ae['layerwise_cost'][optimizer_i]['exclusivity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    cost_weight_decay = sess.run(ae['layerwise_cost'][optimizer_i]['weight_decay'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                     
                    print('layer:', optimizer_i+1, ', epoch:', epoch_i+1, ', total cost:', cost_total, ', recon error:', cost_reconstruction_error, ', sparsity:', cost_sparsity, ', weight decay:', cost_weight_decay, ', exclusivity: ', cost_exclusivity)
                           
                else:
                    cost_total = sess.run(ae['cost']['total'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    cost_reconstruction_error = sess.run(ae['cost']['reconstruction_error'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    cost_sparsity = sess.run(ae['cost']['sparsity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    cost_exclusivity = sess.run(ae['cost']['exclusivity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    cost_weight_decay = sess.run(ae['cost']['weight_decay'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]})
                    
                    print('layer: full,', 'epoch:', epoch_i+1, ', total cost:', cost_total, ', recon error:', cost_reconstruction_error, ', sparsity:', cost_sparsity, ', weight decay:', cost_weight_decay, ', exclusivity: ', cost_exclusivity)

                           
                n_examples = 5120
                test_xs, test_ys = mnist.test.next_batch(n_examples)  
                
                test_xs = np.array([img - mean_img for img in test_xs[np.where(np.any(np.array([
                    test_ys[:, 0], 
                    test_ys[:, 1], 
                    test_ys[:, 2],
                    # test_ys[:, 3], 
                    # test_ys[:, 4], 
                    # test_ys[:, 5],
                    # test_ys[:, 6], 
                    # test_ys[:, 7], 
                    # test_ys[:, 8],
                    # test_ys[:, 9],
                ]) == 1, axis=0))][:144]])
                
                if not optimizer is optimizer_list[-1]:
                    recon = sess.run(ae['layerwise_y'][layer_i], feed_dict={ae['x']: test_xs})
                else:
                    recon = sess.run(ae['y'], feed_dict={ae['x']: test_xs})
                   
                weights = sess.run(ae['encoder_weight'][0])
                # weights = np.transpose(weights, axes=(3,0,1,2))
                
                # display_network(batch_x012[:144].transpose(), filename='mnist_batch_01.png')
                display_network(batch_x01[:144].transpose(), filename='mnist_batch_01.png')
                display_network(batch_x12[:144].transpose(), filename='mnist_batch_12.png')
                display_network(test_xs.transpose(), filename='mnist_test.png')
                display_network(recon.reshape((144,784)).transpose(), filename='mnist_results.png')
                display_network(weights, filename='mnist_weights.png')                             

                  
    writer.close()
    
    return ae
Example #10
0
    def __init__(self, is_training, config, input_):
        self._is_training = is_training
        self._input = input_
        self._rnn_params = None
        self._cell = None
        self.batch_size = input_.batch_size
        self.num_steps = input_.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size],
                                        dtype=data_type())
            inputs = tf.nn.embedding_lookup(embedding, input_.input_data)
            print(inputs)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        output, state = self._build_rnn_graph(inputs, config, is_training)

        softmax_w = tf.get_variable("softmax_w", [size, vocab_size],
                                    dtype=data_type())
        softmax_b = tf.get_variable("softmax_b", [vocab_size],
                                    dtype=data_type())
        logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        # Reshape logits to be a 3-D tensor for sequence loss
        logits = tf.reshape(logits,
                            [self.batch_size, self.num_steps, vocab_size])

        # Use the contrib sequence loss and average over the batches
        loss = tf.contrib.seq2seq.sequence_loss(
            logits,
            input_.targets,
            tf.ones([self.batch_size, self.num_steps], dtype=data_type()),
            average_across_timesteps=False,
            average_across_batch=True)

        # Update the cost
        self._cost = tf.reduce_sum(loss)
        #self._reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), name='reg')#TODO addition
        self._reg = 0.5 * new_rnn.get_co_loss(
            tf.get_collection(
                tf.GraphKeys.GLOBAL_VARIABLES)[1]) + 0.5 * new_rnn.get_co_loss(
                    tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)[3])

        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self._cost + self._reg, tvars), config.max_grad_norm)
        if optimizerType == "ADAM":
            optimizer = tf.train.AdamOptimizer()
        elif optimizerType == "AMS":
            optimizer = AMSGrad()
        else:
            optimizer = tf.train.GradientDescentOptimizer(self._lr)  #TODO
        #optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.train.get_or_create_global_step())

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
Example #11
0
    def __init__(self, preds, labels, preds_X, labels_X, model, num_nodes,
                 pos_weight, norm, d_real, d_fake):
        preds_sub = preds
        labels_sub = labels
        preds_X_sub = preds_X
        labels_X_sub = labels_X

        # Discrimminator Loss
        dc_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.ones_like(d_real), logits=d_real))
        dc_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.zeros_like(d_fake), logits=d_fake))
        self.dc_loss = dc_loss_fake + dc_loss_real

        # Generator loss
        self.generator_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.ones_like(d_fake), logits=d_fake))
        # Define the structure recovery loss
        self.cost = norm * tf.reduce_mean(
            tf.nn.weighted_cross_entropy_with_logits(
                logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
        # Define the attribute (content) recovery loss
        self.cost += norm * tf.reduce_mean(
            tf.nn.weighted_cross_entropy_with_logits(logits=preds_X_sub,
                                                     targets=labels_X_sub,
                                                     pos_weight=pos_weight))
        # Define the cat_loss (second item in our paper) and the kl_loss (third item in our paper)
        z_mean = K.expand_dims(model.z_mean, 1)
        z_log_var = K.expand_dims(model.z_log_std, 1)
        y = model.y
        z_prior_mean = model.z_prior_mean

        kl_loss = -0.5 * (z_log_var - K.square(z_prior_mean))
        kl_loss = K.mean(K.batch_dot(K.expand_dims(y, 1), kl_loss),
                         0) / num_nodes
        cat_loss = K.mean(y * K.log(y + K.epsilon()), 0) / num_nodes

        # Final loss
        self.cost = K.sum(self.cost) + K.sum(kl_loss) + K.sum(cat_loss)

        self.generator_loss = self.generator_loss + self.cost  # add to keep the same as AE

        all_variables = tf.trainable_variables()
        dc_var = [var for var in all_variables if 'dc_' in var.op.name]
        en_var = [var for var in all_variables if 'e_' in var.op.name]

        with tf.variable_scope(tf.get_variable_scope(), reuse=False):
            self.discriminator_optimizer = tf.train.AdamOptimizer(
                learning_rate=FLAGS.discriminator_learning_rate,
                beta1=0.9,
                name='adam1').minimize(
                    self.dc_loss,
                    var_list=dc_var)  #minimize(dc_loss_real, var_list=dc_var)

            self.generator_optimizer = tf.train.AdamOptimizer(
                learning_rate=FLAGS.discriminator_learning_rate,
                beta1=0.9,
                name='adam2').minimize(self.generator_loss, var_list=en_var)
        self.cost = tf.clip_by_value(self.cost, 1e-10, 1e100)

        self.optimizer = AMSGrad(learning_rate=FLAGS.learning_rate,
                                 beta1=0.9,
                                 beta2=0.99,
                                 epsilon=1e-8)
        self.opt_op = self.optimizer.minimize(self.cost)

        self.grads_vars = self.optimizer.compute_gradients(self.cost)
Example #12
0
from __future__ import print_function

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
#from keras.optimizers import adam
import tensorflow as tf

# In[5]:

from AMSGrad import AMSGrad

train_op = AMSGrad(learning_rate=0.01, beta1=0.9, beta2=0.99,
                   epsilon=1e-8).minimize(loss)

# In[6]:

batch_size = 128
num_classes = 10
epochs = 20

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255