Beispiel #1
0
 def single_step(features, labels):
   _, supervised_head_outputs = model(features, training=False)
   assert supervised_head_outputs is not None
   outputs = supervised_head_outputs
   l = labels['labels']
   metrics.update_finetune_metrics_eval(label_top_1_accuracy,
                                        label_top_5_accuracy, outputs, l)
   reg_loss = model_lib.add_weight_decay(model, adjust_per_optimizer=True)
   regularization_loss.update_state(reg_loss)
Beispiel #2
0
 def single_step(features, labels):
   with tf.GradientTape() as tape:
     # Log summaries on the last step of the training loop to match
     # logging frequency of other scalar summaries.
     #
     # Notes:
     # 1. Summary ops on TPUs get outside compiled so they do not affect
     #    performance.
     # 2. Summaries are recorded only on replica 0. So effectively this
     #    summary would be written once per host when should_record == True.
     # 3. optimizer.iterations is incremented in the call to apply_gradients.
     #    So we use  `iterations + 1` here so that the step number matches
     #    those of scalar summaries.
     # 4. We intentionally run the summary op before the actual model
     #    training so that it can run in parallel.
     should_record = tf.equal((optimizer.iterations + 1) % steps_per_loop, 0)
     with tf.summary.record_if(should_record):
       # Only log augmented images for the first tower.
       tf.summary.image(
           'image', features[:, :, :, :3], step=optimizer.iterations + 1)
     projection_head_outputs, supervised_head_outputs = model(
         features, training=True)
     loss = None
     if projection_head_outputs is not None:
       outputs = projection_head_outputs
       con_loss, logits_con, labels_con = obj_lib.add_contrastive_loss(
           outputs,
           hidden_norm=FLAGS.hidden_norm,
           temperature=FLAGS.temperature,
           strategy=strategy)
       if loss is None:
         loss = con_loss
       else:
         loss += con_loss
       metrics.update_pretrain_metrics_train(contrast_loss_metric,
                                             contrast_acc_metric,
                                             contrast_entropy_metric,
                                             con_loss, logits_con,
                                             labels_con)
     if supervised_head_outputs is not None:
       outputs = supervised_head_outputs
       l = labels['labels']
       if FLAGS.train_mode == 'pretrain' and FLAGS.lineareval_while_pretraining:
         l = tf.concat([l, l], 0)
       sup_loss = obj_lib.add_supervised_loss(labels=l, logits=outputs)
       if loss is None:
         loss = sup_loss
       else:
         loss += sup_loss
       metrics.update_finetune_metrics_train(supervised_loss_metric,
                                             supervised_acc_metric, sup_loss,
                                             l, outputs)
     weight_decay = model_lib.add_weight_decay(
         model, adjust_per_optimizer=True)
     weight_decay_metric.update_state(weight_decay)
     loss += weight_decay
     total_loss_metric.update_state(loss)
     # The default behavior of `apply_gradients` is to sum gradients from all
     # replicas so we divide the loss by the number of replicas so that the
     # mean gradient is applied.
     loss = loss / strategy.num_replicas_in_sync
     logging.info('Trainable variables:')
     for var in model.trainable_variables:
       logging.info(var.name)
     grads = tape.gradient(loss, model.trainable_variables)
     optimizer.apply_gradients(zip(grads, model.trainable_variables))