def print_model_summary(network): network.build(input_shape=(None, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS)) network.summary() if __name__ == '__main__': print("Train start\n") # GPU settings gpus = tf.config.list_physical_devices("GPU") if gpus: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) print("Generate the Data Set\n") dataset = TFDataset() train_data, train_count = dataset.generate_datatset() ssd = SSD() print_model_summary(network=ssd) if load_weights_before_training: ssd.load_weights(filepath=save_model_dir+"epoch-{}".format(load_weights_from_epoch)) print("Successfully load weights!") else: load_weights_from_epoch = -1 # loss loss = SSDLoss() # optimizer
def main(): dataset = TFDataset() train_data, train_count = dataset.generate_datatset() model = SSD() print_model_summary(model) if load_weights_from_epoch >= 0: model.load_weights(filepath=save_model_dir + "epoch-{}".format(load_weights_from_epoch)) print("成功从epoch-{}加载模型权重!".format(load_weights_from_epoch)) loss_fn = MultiBoxLoss(num_classes=NUM_CLASSES, overlap_thresh=0.5, neg_pos=3) # optimizer lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=1e-3, decay_steps=20000, decay_rate=0.96) optimizer = tf.optimizers.Adam(learning_rate=lr_schedule) loss_metric = tf.metrics.Mean() cls_loss_metric = tf.metrics.Mean() reg_loss_metric = tf.metrics.Mean() for epoch in range(load_weights_from_epoch + 1, EPOCHS): start_time = time.time() for step, batch_data in enumerate(train_data): images, labels = ReadDataset().read(batch_data) with tf.GradientTape() as tape: predictions = model(images, training=True) loss_l, loss_c = loss_fn(y_true=labels, y_pred=predictions) total_loss = loss_l + loss_c gradients = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients( grads_and_vars=zip(gradients, model.trainable_variables)) loss_metric.update_state(values=total_loss) cls_loss_metric.update_state(values=loss_c) reg_loss_metric.update_state(values=loss_l) time_per_step = (time.time() - start_time) / (step + 1) print( "Epoch: {}/{}, step: {}/{}, speed: {:.2f}s/step, loss: {:.10f}, " "cls loss: {:.10f}, reg loss: {:.10f}".format( epoch, EPOCHS, step, tf.math.ceil(train_count / BATCH_SIZE), time_per_step, loss_metric.result(), cls_loss_metric.result(), reg_loss_metric.result())) loss_metric.reset_states() cls_loss_metric.reset_states() reg_loss_metric.reset_states() if epoch % save_frequency == 0: model.save_weights(filepath=save_model_dir + "epoch-{}".format(epoch), save_format="tf") if test_images_during_training: visualize_training_results(pictures=test_images_dir_list, model=model, epoch=epoch) model.save_weights(filepath=save_model_dir + "epoch-{}".format(EPOCHS), save_format="tf")