Ejemplo n.º 1
0
    def finetune_parameters(self, output_dim, epochs=1, batch_method="random"):
        """Performs fine tuning on all parameters of the neural network plus two additional softmax
        variables. Call this method after `pretrain_network` is complete. Y values should be represented
        in one-hot format.
        :param x_train_path: A string, the path to the x train values.
        :param y_train_path: A string, the path to the y train values.
        :param output_dim: An int, the number of classes in the target classification problem. Ex: 10 for MNIST.
        :param epochs: An int, the number of iterations to tune through the entire dataset.
        :param batch_method: A string, either 'random' or 'sequential', to indicate how batches are retrieved.
        :return: The tuned softmax parameters (weights and biases) of the classification layer.
        """
        data = input_data.read_data_sets("data/MNIST", one_hot=True)
        if batch_method == "random":
            x_train = data.train.images
            y_label = data.train.labels
            shuff = zip(x_train, y_label)
            np.random.shuffle(shuff)
            xy_train = [
                _ for _ in utilities.gen_batches(shuff, FLAGS.batch_size)
            ]
        else:
            x_train = data.train.images
            y_label = data.train.labels
            shuff = zip(x_train, y_label)
            xy_train = [
                _ for _ in utilities.gen_batches(shuff, FLAGS.batch_size)
            ]

        return self.finetune_parameters_gen(xy_train_gen=xy_train,
                                            output_dim=output_dim)
Ejemplo n.º 2
0
 def pre_train_network(self):
     print 'Starting to pretrain autoencoder network.'
     data = input_data.read_data_sets("data/MNIST", one_hot=True)
     for i in range(len(self.hidden_layers)):
         if FLAGS.batch_method == "random":
             # x_train = get_random_batch_generator(self.batch_size, FLAGS.x_train_path, repeat=FLAGS.epochs - 1)
             # data = input_data.read_data_sets("data/MNIST", one_hot=True)
             x_train = data.train.images
             np.random.shuffle(x_train)
             x_train = [
                 _ for _ in utilities.gen_batches(x_train, FLAGS.batch_size)
             ]
         else:
             # x_train = get_batch_generator(FLAGS.x_train_path, self.batch_size, repeat=FLAGS.epochs-1)
             # data = input_data.read_data_sets("data/MNIST", one_hot=True)
             x_train = data.train.images
             x_train = [
                 _ for _ in utilities.gen_batches(x_train, FLAGS.batch_size)
             ]
         self.pre_train_layer(i, x_train)
     print 'Finished pretraining of autoencoder network.'
Ejemplo n.º 3
0
if __name__ == '__main__':
    IMG_SIZE = 28
    gbrbm = GBRBM(IMG_SIZE * IMG_SIZE, 500, cdk=30, epoch=300)
    o_train_set_x = np.load('../theano_rbm/data/origin_target_train_28.npy')
    # print type(o_train_set_x), o_train_set_x.shape, np.max(o_train_set_x), np.min(o_train_set_x)
    # o_train_set_x = np.load('../theano_rbm/data/face_train_dataset_19.npy')

    sess = tf.Session()
    summary = tf.merge_all_summaries()
    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
    sess.run(tf.initialize_all_variables())

    np.random.shuffle(o_train_set_x)
    batches = [
        _ for _ in utilities.gen_batches(o_train_set_x, int(gbrbm.batch_size))
    ]
    updates = gbrbm.update_parameter()
    # updates = gbrbm.no_adaptive_update_parameter()

    anneal_counter = 0
    base_lrate = gbrbm.gradient_lr

    _, _, energy_sum = gbrbm.energy_function(gbrbm.input_img, gbrbm.W, gbrbm.v,
                                             gbrbm.h, gbrbm.sigma)

    # print updates
    # energy0_sum_rcon, energy0_sum_origin, energy_sum, energy_sum2, now_cost = sess.run(updates, feed_dict={gbrbm.input_img: batches[0]})
    # print energy0_sum_rcon, energy0_sum_origin, energy_sum, energy_sum2
    # print now_cost, type(now_cost)
    def finetune_parameters_gen(self, xy_train_gen, output_dim, epochs):
        """An implementation of finetuning to support data feeding from generators."""
        sess = self.sess
        summary_list = []
        batch_s = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(
            0.0001,  # Base learning rate.
            batch_s,  # Current index into the dataset.
            30000,  # Decay step.
            0.95,  # Decay rate.
            staircase=True)

        print("Starting to fine tune parameters of network.")
        with tf.name_scope("finetuning"):
            with tf.name_scope("inputs"):
                x = tf.placeholder(tf.float32,
                                   shape=[None, self.input_dim],
                                   name="raw_input")
                with tf.name_scope("fully_encoded"):
                    x_encoded = self.get_encoded_input(
                        x, depth=-1)  # Full depth encoding
            """Note on W below: The difference between self.output_dim and output_dim is that the former
            is the output dimension of the autoencoder stack, which is the dimension of the new feature
            space. The latter is the dimension of the y value space for classification. Ex: If the output
            should be binary, then the output_dim = 2."""
            with tf.name_scope("outputs"):
                y_logits = tf.matmul(x_encoded, self.W) + self.b
                with tf.name_scope("predicted"):
                    y_pred = tf.nn.softmax(y_logits, name="y_pred")
                    # attach_variable_summaries(y_pred, y_pred.name, summ_list=summary_list)
                with tf.name_scope("actual"):
                    y_actual = tf.placeholder(tf.float32,
                                              shape=[None, output_dim],
                                              name="y_actual")
                    # attach_variable_summaries(y_actual, y_actual.name, summ_list=summary_list)

            trainable_vars = self.get_all_variables(
                additional_layer=[self.W, self.b])
            trainable_weights = self.get_all_variables_weights(
                additional_layer=[self.W])

            with tf.name_scope('weights_norm'):
                weights_norm = tf.reduce_sum(input_tensor=tf.reduce_mean(
                    tf.pack([
                        FLAGS.deacy_factor * tf.nn.l2_loss(weight)
                        for weight in trainable_weights
                    ])),
                                             name='weights_norm')

            with tf.name_scope("cross_entropy"):
                cross_entropy = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        y_logits, y_actual))
                # loss = cross_entropy + weights_norm
                loss = cross_entropy
                attach_scalar_summary(cross_entropy,
                                      "cross_entropy",
                                      summ_list=summary_list)
                attach_scalar_summary(learning_rate,
                                      'finetune_lr',
                                      summ_list=summary_list)
                # attach_scalar_summary(loss, 'loss_val', summ_list=summary_list)

            with tf.name_scope("train_step"):
                train_step = tf.train.AdamOptimizer(
                    learning_rate=learning_rate).minimize(
                        loss, var_list=trainable_vars, global_step=batch_s)
                # # Use simple momentum for the optimization.
                # train_step = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, var_list=trainable_vars, global_step=batch_s)

            with tf.name_scope("evaluation"):
                correct_prediction = tf.equal(tf.argmax(y_pred, 1),
                                              tf.argmax(y_actual, 1))
                accuracy = tf.reduce_mean(
                    tf.cast(correct_prediction, tf.float32))
                attach_scalar_summary(accuracy,
                                      "finetune_accuracy",
                                      summ_list=summary_list)

            sess.run(tf.initialize_all_variables())

            # Merge summaries and get a summary writer
            merged = tf.merge_summary(summary_list)
            train_writer = tf.train.SummaryWriter(
                FLAGS.log_dir + "/train/finetune", sess.graph)

            step = 0
            for i in range(epochs):
                np.random.shuffle(xy_train_gen)
                # print len(xy_train_gen)
                train_data_batchs = [
                    _ for _ in utilities.gen_batches(xy_train_gen,
                                                     FLAGS.batch_size)
                ]
                for batch in train_data_batchs:
                    batch_xs, batch_ys = zip(*batch)
                    # print 'get xs batch size===', len(batch_xs), type(batch_xs[0]), batch_xs[0].shape
                    # print 'get ys batch size===', len(batch_ys), type(batch_ys[0]), batch_ys[0].shape
                    if step % self.print_step == 0:
                        print(
                            "Step %s, batch accuracy: " % step,
                            sess.run(accuracy,
                                     feed_dict={
                                         x: batch_xs,
                                         y_actual: batch_ys
                                     }))
                        # print('pop_mean in hidden layer 1 is :', sess.run(self.hidden_layers[1].pop_mean))

                    # For debugging predicted y values
                    if step % (self.print_step * 10) == 0:
                        print("Predicted y-value:",
                              sess.run(y_pred, feed_dict={x: batch_xs})[0])
                        print("Actual y-value:", batch_ys[0])

                    if step % FLAGS.log_step == 0:
                        summary = sess.run(merged,
                                           feed_dict={
                                               x: batch_xs,
                                               y_actual: batch_ys
                                           })
                        train_writer.add_summary(summary, global_step=step)

                    # For debugging, break early.
                    if FLAGS.debug and step > 5:
                        break

                    sess.run(train_step,
                             feed_dict={
                                 x: batch_xs,
                                 y_actual: batch_ys
                             })
                    step += 1
            print("Completed fine-tuning of parameters.")
            tuned_params = {
                "layer1_weights":
                sess.run(self.hidden_layers[0].get_weight_variable()),
                "layer2_weights":
                sess.run(self.hidden_layers[1].get_weight_variable()),
                "layer3_weights":
                sess.run(self.hidden_layers[2].get_weight_variable()),
                "weights":
                sess.run(self.W),
                "biases":
                sess.run(self.b),
                "layer1_pop_mean":
                sess.run(self.hidden_layers[1].pop_mean),
                "layer1_pop_var":
                sess.run(self.hidden_layers[1].pop_var)
            }
            return tuned_params
    def pre_train_layer(self, depth, data, epoch):
        self.pretrain_lr = 0.01
        sess = self.sess
        print 'Starting to pretrain layer %d.' % depth
        hidden_layer = self.hidden_layers[depth]
        summary_list = []
        with tf.name_scope(hidden_layer.name):
            with tf.name_scope("x_values"):
                x_original = tf.placeholder(tf.float32,
                                            shape=[None, self.input_dim])
                x_latent = self.get_encoded_input(x_original, depth)
                x_corrupt = corrupt(x_latent, corruption_level=self.noise)

            with tf.name_scope("encoded_and_decoded"):
                encoded = hidden_layer.encode(x_corrupt)
                encoded = tf.nn.dropout(encoded, keep_prob=0.5)
                decoded = hidden_layer.decode(encoded)
                # attach_variable_summaries(encoded, "encoded", summ_list=summary_list)
                # attach_variable_summaries(decoded, "decoded", summ_list=summary_list)
                attach_variable_summaries(hidden_layer.get_weight_variable(),
                                          "weights",
                                          summ_list=summary_list)

            with tf.name_scope('weights_norm'):
                weights_norm = tf.reduce_sum(
                    input_tensor=FLAGS.deacy_factor *
                    tf.nn.l2_loss(hidden_layer.get_weight_variable()),
                    name='weights_norm')
            # Reconstruction loss
            with tf.name_scope("reconstruction_loss"):
                # loss = self.get_loss(x_latent, decoded)
                val_loss = self.get_l2_loss(x_latent, decoded)
                loss = val_loss + weights_norm
                attach_scalar_summary(loss,
                                      "%s_loss" % 'l2_loss',
                                      summ_list=summary_list)
                # attach_scalar_summary(self.pretrain_lr, 'pretrain_lr', summ_list=summary_list)
            trainable_vars = [
                hidden_layer.weights, hidden_layer.biases,
                hidden_layer.decode_biases
            ]

            # Only optimize variables for this layer ("greedy")
            with tf.name_scope("train_step"):
                train_op = tf.train.AdamOptimizer(
                    learning_rate=self.pretrain_lr).minimize(
                        loss, var_list=trainable_vars)
            sess.run(tf.initialize_all_variables())

            # Merge summaries and get a summary writer
            merged = tf.merge_summary(summary_list)
            pretrain_writer = tf.train.SummaryWriter(
                "model/" + hidden_layer.name, sess.graph)

            step = 0
            for i in range(epoch):
                np.random.shuffle(data)
                batches = [
                    _ for _ in utilities.gen_batches(data, FLAGS.batch_size)
                ]
                for batch_x_original in batches:
                    sess.run(train_op,
                             feed_dict={x_original: batch_x_original})

                    if step % self.print_step == 0:
                        loss_value = sess.run(
                            loss, feed_dict={x_original: batch_x_original})
                        endoce_mean = sess.run(
                            tf.reduce_mean(encoded),
                            feed_dict={x_original: batch_x_original})
                        print("Step %s, batch %s loss = %s, weights_mean=%s" %
                              (step, 'l2_loss', loss_value, endoce_mean))

                    if step % FLAGS.log_step == 0:
                        summary = sess.run(
                            merged, feed_dict={x_original: batch_x_original})
                        pretrain_writer.add_summary(summary, global_step=step)

                    # Break for debugging purposes
                    if FLAGS.debug and step > 5:
                        break
                    step += 1
                # if epoch % 5 == 0:
                #     if self.pretrain_lr >= 0.00001:
                #         self.pretrain_lr /= 2.0
            print(
                "Finished pretraining of layer %d. Updated layer weights and biases."
                % depth)
Ejemplo n.º 6
0
    # loss = auto_encoder.loss_corss_entropy(output)
    optimize = auto_encoder.train(loss)
    auto_encoder.summary_parameter(loss)

    sess = tf.Session()
    init = tf.initialize_all_variables()
    sess.run(init)

    writer = tf.train.SummaryWriter('model', sess.graph)
    summary = tf.merge_all_summaries()

    x_corrupted = _corrupt_input(o_train_set_x)
    shuff = zip(o_train_set_x, x_corrupted)
    for step in range(FLAGS.epochs):
        np.random.shuffle(shuff)
        batches = [_ for _ in utilities.gen_batches(shuff, FLAGS.batch_size)]
        start_time = time.time()
        for batch in batches:
            x_batch, x_corr_batch = zip(*batch)
            _, loss_value, summary_val, output_val = sess.run(
                [optimize, loss, summary, output],
                feed_dict={
                    auto_encoder.input: x_corr_batch,
                    auto_encoder.input_with_out_noise: x_batch
                })
            writer.add_summary(summary_val)
        duration = time.time() - start_time
        # Write the summaries and print an overview fairly often.
        if step % 100 == 0:
            # Print status to stdout.
            print('Step %d: loss = %.2f (%.3f sec)   %.2f ' %
Ejemplo n.º 7
0
    def finetune_parameters_gen(self, xy_train_gen, output_dim, epochs):
        """An implementation of finetuning to support data feeding from generators."""
        sess = self.sess
        summary_list = []

        print("Starting to fine tune parameters of network.")
        with tf.name_scope("finetuning"):
            with tf.name_scope("inputs"):
                x = tf.placeholder(tf.float32,
                                   shape=[None, self.input_dim],
                                   name="raw_input")
                with tf.name_scope("fully_encoded"):
                    x_encoded = self.get_encoded_input(
                        x, depth=-1)  # Full depth encoding
            """Note on W below: The difference between self.output_dim and output_dim is that the former
            is the output dimension of the autoencoder stack, which is the dimension of the new feature
            space. The latter is the dimension of the y value space for classification. Ex: If the output
            should be binary, then the output_dim = 2."""
            with tf.name_scope("softmax_variables"):
                self.W = weight_variable(self.output_dim,
                                         output_dim,
                                         name="weights")
                self.b = bias_variable(output_dim,
                                       initial_value=0,
                                       name="biases")
                attach_variable_summaries(self.W,
                                          self.W.name,
                                          summ_list=summary_list)
                attach_variable_summaries(self.b,
                                          self.b.name,
                                          summ_list=summary_list)
            with tf.name_scope("outputs"):
                y_logits = tf.matmul(x_encoded, self.W) + self.b
                with tf.name_scope("predicted"):
                    y_pred = tf.nn.softmax(y_logits, name="y_pred")
                    attach_variable_summaries(y_pred,
                                              y_pred.name,
                                              summ_list=summary_list)
                with tf.name_scope("actual"):
                    y_actual = tf.placeholder(tf.float32,
                                              shape=[None, output_dim],
                                              name="y_actual")
                    attach_variable_summaries(y_actual,
                                              y_actual.name,
                                              summ_list=summary_list)

            with tf.name_scope("cross_entropy"):
                cross_entropy = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        y_logits, y_actual))
                attach_scalar_summary(cross_entropy,
                                      "cross_entropy",
                                      summ_list=summary_list)

            trainable_vars = self.get_all_variables(
                additional_layer=[self.W, self.b])

            with tf.name_scope("train_step"):
                train_step = tf.train.AdamOptimizer(
                    learning_rate=self.finetune_lr).minimize(
                        cross_entropy, var_list=trainable_vars)

            with tf.name_scope("evaluation"):
                correct_prediction = tf.equal(tf.argmax(y_pred, 1),
                                              tf.argmax(y_actual, 1))
                accuracy = tf.reduce_mean(
                    tf.cast(correct_prediction, tf.float32))
                attach_scalar_summary(accuracy,
                                      "finetune_accuracy",
                                      summ_list=summary_list)

            sess.run(tf.initialize_all_variables())

            # Merge summaries and get a summary writer
            merged = tf.merge_summary(summary_list)
            train_writer = tf.train.SummaryWriter(
                FLAGS.log_dir + "/train/finetune", sess.graph)

            step = 0
            for i in range(epochs):
                np.random.shuffle(xy_train_gen)
                train_data_batchs = [
                    _ for _ in utilities.gen_batches(xy_train_gen,
                                                     FLAGS.batch_size)
                ]
                for batch in train_data_batchs:
                    batch_xs, batch_ys = zip(*batch)
                    # print 'get xs batch size===', len(batch_xs), type(batch_xs[0]), batch_xs[0].shape
                    # print 'get ys batch size===', len(batch_ys), type(batch_ys[0]), batch_ys[0].shape
                    if step % self.print_step == 0:
                        print(
                            "Step %s, batch accuracy: " % step,
                            sess.run(accuracy,
                                     feed_dict={
                                         x: batch_xs,
                                         y_actual: batch_ys
                                     }))

                    # For debugging predicted y values
                    if step % (self.print_step * 10) == 0:
                        print("Predicted y-value:",
                              sess.run(y_pred, feed_dict={x: batch_xs})[0])
                        print("Actual y-value:", batch_ys[0])

                    if step % FLAGS.log_step == 0:
                        summary = sess.run(merged,
                                           feed_dict={
                                               x: batch_xs,
                                               y_actual: batch_ys
                                           })
                        train_writer.add_summary(summary, global_step=step)

                    # For debugging, break early.
                    if FLAGS.debug and step > 5:
                        break

                    sess.run(train_step,
                             feed_dict={
                                 x: batch_xs,
                                 y_actual: batch_ys
                             })
                    step += 1
            print("Completed fine-tuning of parameters.")
            tuned_params = {
                "layer1_weights":
                sess.run(self.hidden_layers[0].get_weight_variable()),
                "layer2_weights":
                sess.run(self.hidden_layers[1].get_weight_variable()),
                "layer3_weights":
                sess.run(self.hidden_layers[2].get_weight_variable()),
                "weights":
                sess.run(self.W),
                "biases":
                sess.run(self.b)
            }
            return tuned_params
Ejemplo n.º 8
0
            tuned_params = {"weights": sess.run(W), "biases": sess.run(b)}

            return tuned_params


if __name__ == '__main__':
    # Start a TensorFlow session
    sess = tf.Session()

    # Initialize an unconfigured autoencoder with specified dimensions, etc.
    sda = SDAutoencoder(dims=[784, 256, 64, 32],
                        activations=["relu", "relu", "relu"],
                        sess=sess,
                        noise=0.1)

    # Pretrain weights and biases of each layer in the network.
    # sda.pre_train_network()
    # Read in test y-values to softmax classifier.
    # sda.finetune_parameters(epochs=10, output_dim=10)
    # Write to file the newly represented features.
    # sda.write_encoded_input(filepath="data/transformed.csv", x_test_path=FLAGS.x_train_path)

    data = input_data.read_data_sets("data/MNIST", one_hot=True)
    temp_train = data.train.images
    for i in range(100):
        np.random.shuffle(temp_train)
        x_train = [
            _ for _ in utilities.gen_batches(temp_train, FLAGS.batch_size)
        ]
        sda.pre_train_layer(0, x_train)