コード例 #1
0
    def apply_loss_function(self, global_step):
        # loss function
        self.cos_loss = cross_entropy_loss(self.img_last_layer,
                                           self.img_label,
                                           self.alpha,
                                           normed=True,
                                           balanced=True)
        self.q_loss = self.cq_lambda * quantization_loss(self.img_last_layer)
        self.loss = self.cos_loss + self.q_loss

        # Last layer has a 10 times learning rate
        self.lr = tf.train.exponential_decay(self.learning_rate,
                                             global_step,
                                             self.decay_step,
                                             self.learning_rate_decay_factor,
                                             staircase=True)
        opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9)
        grads_and_vars = opt.compute_gradients(
            self.loss, self.train_layers + self.train_last_layer)
        fcgrad, _ = grads_and_vars[-2]
        fbgrad, _ = grads_and_vars[-1]

        # for debug
        self.grads_and_vars = grads_and_vars
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('ce_loss', self.cos_loss)
        tf.summary.scalar('q_loss', self.q_loss)
        tf.summary.scalar('lr', self.lr)
        self.merged = tf.summary.merge_all()

        if self.finetune_all:
            return opt.apply_gradients(
                [(grads_and_vars[0][0], self.train_layers[0]),
                 (grads_and_vars[1][0] * 2, self.train_layers[1]),
                 (grads_and_vars[2][0], self.train_layers[2]),
                 (grads_and_vars[3][0] * 2, self.train_layers[3]),
                 (grads_and_vars[4][0], self.train_layers[4]),
                 (grads_and_vars[5][0] * 2, self.train_layers[5]),
                 (grads_and_vars[6][0], self.train_layers[6]),
                 (grads_and_vars[7][0] * 2, self.train_layers[7]),
                 (grads_and_vars[8][0], self.train_layers[8]),
                 (grads_and_vars[9][0] * 2, self.train_layers[9]),
                 (grads_and_vars[10][0], self.train_layers[10]),
                 (grads_and_vars[11][0] * 2, self.train_layers[11]),
                 (grads_and_vars[12][0], self.train_layers[12]),
                 (grads_and_vars[13][0] * 2, self.train_layers[13]),
                 (fcgrad * 10, self.train_last_layer[0]),
                 (fbgrad * 20, self.train_last_layer[1])],
                global_step=global_step)
        else:
            return opt.apply_gradients(
                [(fcgrad * 10, self.train_last_layer[0]),
                 (fbgrad * 20, self.train_last_layer[1])],
                global_step=global_step)
コード例 #2
0
def train(model: models.Model,
          optimizer: optimizers.Optimizer,
          train_instances: List[Dict[str, np.ndarray]],
          validation_instances: List[Dict[str, np.ndarray]],
          num_epochs: int,
          batch_size: int,
          serialization_dir: str = None) -> tf.keras.Model:
    """
    Trains a model on the give training instances as configured and stores
    the relevant files in serialization_dir. Returns model and some important metrics.
    """

    print("\nGenerating Training batches:")
    train_batches = generate_batches(train_instances, batch_size)
    print("Generating Validation batches:")
    validation_batches = generate_batches(validation_instances, batch_size)

    train_batch_labels = [
        batch_inputs.pop("labels") for batch_inputs in train_batches
    ]
    validation_batch_labels = [
        batch_inputs.pop("labels") for batch_inputs in validation_batches
    ]

    tensorboard_logs_path = os.path.join(serialization_dir,
                                         f'tensorboard_logs')
    tensorboard_writer = tf.summary.create_file_writer(tensorboard_logs_path)
    best_epoch_validation_accuracy = float("-inf")
    best_epoch_validation_loss = float("inf")
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch}")

        total_training_loss = 0
        total_correct_predictions, total_predictions = 0, 0
        generator_tqdm = tqdm(list(zip(train_batches, train_batch_labels)))
        for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm):
            with tf.GradientTape() as tape:
                logits = model(**batch_inputs, training=True)["logits"]
                loss_value = cross_entropy_loss(logits, batch_labels)
                grads = tape.gradient(loss_value, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            total_training_loss += loss_value
            batch_predictions = np.argmax(tf.nn.softmax(logits,
                                                        axis=-1).numpy(),
                                          axis=-1)
            total_correct_predictions += (
                batch_predictions == batch_labels).sum()
            total_predictions += batch_labels.shape[0]
            description = (
                "Average training loss: %.2f Accuracy: %.2f " %
                (total_training_loss /
                 (index + 1), total_correct_predictions / total_predictions))
            generator_tqdm.set_description(description, refresh=False)
        average_training_loss = total_training_loss / len(train_batches)
        training_accuracy = total_correct_predictions / total_predictions

        total_validation_loss = 0
        total_correct_predictions, total_predictions = 0, 0
        generator_tqdm = tqdm(
            list(zip(validation_batches, validation_batch_labels)))
        for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm):
            logits = model(**batch_inputs, training=False)["logits"]
            loss_value = cross_entropy_loss(logits, batch_labels)
            total_validation_loss += loss_value
            batch_predictions = np.argmax(tf.nn.softmax(logits,
                                                        axis=-1).numpy(),
                                          axis=-1)
            total_correct_predictions += (
                batch_predictions == batch_labels).sum()
            total_predictions += batch_labels.shape[0]
            description = (
                "Average validation loss: %.2f Accuracy: %.2f " %
                (total_validation_loss /
                 (index + 1), total_correct_predictions / total_predictions))
            generator_tqdm.set_description(description, refresh=False)
        average_validation_loss = total_validation_loss / len(
            validation_batches)
        validation_accuracy = total_correct_predictions / total_predictions

        if validation_accuracy > best_epoch_validation_accuracy:
            print(
                "Model with best validation accuracy so far: %.2f. Saving the model."
                % (validation_accuracy))
            classifier.save_weights(
                os.path.join(serialization_dir, f'model.ckpt'))
            best_epoch_validation_loss = average_validation_loss
            best_epoch_validation_accuracy = validation_accuracy

        with tensorboard_writer.as_default():
            tf.summary.scalar("loss/training",
                              average_training_loss,
                              step=epoch)
            tf.summary.scalar("loss/validation",
                              average_validation_loss,
                              step=epoch)
            tf.summary.scalar("accuracy/training",
                              training_accuracy,
                              step=epoch)
            tf.summary.scalar("accuracy/validation",
                              validation_accuracy,
                              step=epoch)
        tensorboard_writer.flush()

    metrics = {
        "training_loss": float(average_training_loss),
        "validation_loss": float(average_validation_loss),
        "training_accuracy": float(training_accuracy),
        "best_epoch_validation_accuracy":
        float(best_epoch_validation_accuracy),
        "best_epoch_validation_loss": float(best_epoch_validation_loss)
    }

    print("Best epoch validation accuracy: %.4f, validation loss: %.4f" %
          (best_epoch_validation_accuracy, best_epoch_validation_loss))

    return {"model": model, "metrics": metrics}
コード例 #3
0
ファイル: xor_problem.py プロジェクト: trevormcinroe/msds458
epoch_counter = 0

while epoch_counter < EPOCHS:
    # Grabbing a mini-batch
    X_mb, y_mb = mb.fetch_minibatch()

    # Explicit check to see if we have run out of data
    # If so, increment the epoch and reset the MiniBatcher
    if isinstance(X_mb, bool):
        epoch_counter += 1
        mb.new_epoch()
        X_mb, y_mb = mb.fetch_minibatch()

    output = nn.forward_pass(input=X_mb)
    sm_output = nn.softmax(input=output)
    loss = cross_entropy_loss(y_pred=sm_output, y_actual=y_mb)
    grad = nn.get_gradient(input=X_mb, y_pred=sm_output, y_actual=y_mb)
    adam.update_weights(weights=nn.weights, gradient=grad)
    historical_losses.append(loss)

# Our final prediction...
y_pred = nn.softmax(nn.forward_pass(input=train_samples))
print(f'Trained network predictions: {np.argmax(y_pred, axis=1)}')
print(f'Ground-truth values: {train_labels}')
if np.array_equal(np.argmax(y_pred, axis=1), train_labels):
    print('Congrats, your network has solved the XOR problem!')
else:
    print('Looks like your network is not quite there... Try more epochs.')

# Converting the historical_loss list into a plot...
plt.plot(historical_losses)
コード例 #4
0
# params
lr = 1e-5
total_updates = 10000
log_interval  = 1000

F = Model()
D = Data("../data/mnist/train-images",batch_size = 32)


x_train,y_train = D.get_batch()
x_train = tf.reshape(x_train,[-1,28*28])

opt = tf.train.GradientDescentOptimizer(lr)

y_logits, _ = F.inference(x_train)
loss      = cross_entropy_loss(logits = y_logits, labels = y_train)

correct_prediction = tf.equal(y_train, tf.cast(tf.argmax(y_logits, 1),dtype=tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

back_prop = opt.minimize(loss)

with tf.Session() as sess: 
  # tensor flow things
  init = tf.global_variables_initializer()
  coord = tf.train.Coordinator()
  threads = tf.train.start_queue_runners(sess=sess, coord=coord)
  
  sess.run(init)
  for i in range(1,total_updates+1):
    sess.run(
コード例 #5
0
def train_svhn():
    """Train SVHN data for a number of steps."""
    graph = tf.Graph()
    with graph.as_default():

        # Get images and labels for SVHN.
        train_dataset, test_dataset, train_labels, \
        test_labels, train_lengths, test_lengths = preprocessing.load_svhn()

        # Get images and labels for CIFAR-10.
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(FLAGS.BATCH_SIZE,
                                                 FLAGS.IM_SIZE, FLAGS.IM_SIZE,
                                                 FLAGS.num_channels))
        tf_train_lengths = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE))
        tf_train_labels1 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE))
        tf_train_labels2 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE))
        tf_train_labels3 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE))
        tf_train_labels4 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE))
        tf_train_labels5 = tf.placeholder(tf.int32, shape=(FLAGS.BATCH_SIZE))
        #tf_test_dataset = tf.constant(test_dataset)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits0, logits1, logits2, logits3, logits4, logits5 = svhn_model(
            tf_train_dataset)
        train_predictions0, train_predictions1, train_predictions2, train_predictions3, train_predictions4, train_predictions5 = [
            tf.nn.softmax(logits0),
            tf.nn.softmax(logits1),
            tf.nn.softmax(logits2),
            tf.nn.softmax(logits3),
            tf.nn.softmax(logits4),
            tf.nn.softmax(logits5)
        ]

        # Calculate loss.
        loss = cross_entropy_loss(
            logits0, tf_train_lengths) + cross_entropy_loss(
                logits1, tf_train_labels1) + cross_entropy_loss(
                    logits2, tf_train_labels2) + cross_entropy_loss(
                        logits3, tf_train_labels3) + cross_entropy_loss(
                            logits4, tf_train_labels4) + cross_entropy_loss(
                                logits5, tf_train_labels5)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(1e-3,
                                                   global_step,
                                                   7500,
                                                   0.5,
                                                   staircase=True)
        optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(
            loss, global_step=global_step)

    with tf.Session(graph=graph):
        init = tf.global_variables_initializer()
        sess = tf.Session()
        sess.run(init)
        print('Initialized')

        for step in range(FLAGS.MAX_STEPS):
            start_time = time.time()

            offset = (step * FLAGS.BATCH_SIZE) % (train_labels.shape[0] -
                                                  FLAGS.BATCH_SIZE)
            batch_data = train_dataset[offset:(offset +
                                               FLAGS.BATCH_SIZE), :, :, :]
            batch_lengths = train_lengths[offset:(offset + FLAGS.BATCH_SIZE)]
            batch_labels1 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 0]
            batch_labels2 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 1]
            batch_labels3 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 2]
            batch_labels4 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 3]
            batch_labels5 = train_labels[offset:(offset + FLAGS.BATCH_SIZE), 4]

            feed_dict = {
                tf_train_dataset: batch_data,
                tf_train_lengths: batch_lengths,
                tf_train_labels1: batch_labels1,
                tf_train_labels2: batch_labels2,
                tf_train_labels3: batch_labels3,
                tf_train_labels4: batch_labels4,
                tf_train_labels5: batch_labels5
            }

            _, loss_value, predictions0, predictions1, predictions2, predictions3, predictions4, predictions5 = sess.run(
                [
                    optimizer, loss, train_predictions0, train_predictions1,
                    train_predictions2, train_predictions3, train_predictions4,
                    train_predictions5
                ],
                feed_dict=feed_dict)
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 50 == 0:
                accuracy_batch = ((accuracy(predictions0, batch_lengths) +
                                   accuracy(predictions1, batch_labels1) +
                                   accuracy(predictions2, batch_labels2) +
                                   accuracy(predictions3, batch_labels3) +
                                   accuracy(predictions4, batch_labels4) +
                                   accuracy(predictions5, batch_labels5)) / 6)

                format_str = (
                    '%s: step %d, loss = %.2f, batch accuracy = %.1f%% (%.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    accuracy_batch, duration))
コード例 #6
0
 def compute_loss(self, true_hm, true_wh, true_reg, reg_mask, ind, true_cls):
     hm_loss = loss.focal_loss(self.pred_hm, true_hm) * cfgs.HM_LOSS_WEIGHT
     wh_loss = loss.reg_l1_loss(self.pred_wh, true_wh, ind, reg_mask) * cfgs.WH_LOSS_WEIGHT
     reg_loss = loss.reg_l1_loss(self.pred_reg, true_reg, ind, reg_mask) * cfgs.REG_LOSS_WEIGHT
     cls_loss = loss.cross_entropy_loss(self.pred_cls, true_cls, reg_mask) * cfgs.CLS_LOSS_WEIGHT
     return hm_loss, wh_loss, reg_loss, cls_loss