Example #1
0
def main(argv):
    # Need the user to provide system argv for job_id and product_id, it is prepared for frontend calling
    if len(argv) < 2 or len(argv) > 3:
        print(
            "ERROR: Format error, refer to the usage: python test.py job_id product_id"
        )
    elif not argv[1].isdigit():
        print("ERROR: Format error, job_id must be in int format")
    elif not argv[1].isalnum():
        print(
            "ERROR: Format error, product_id must be consistent by character or number, without special character"
        )
    else:
        print("INFO: Start training model " +
              datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
        # GPU settings
        gpus = tf.config.list_physical_devices("GPU")
        if gpus:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

        # Folder generate for log file and model saving
        log_dir, save_model_dir = folder_preparation(argv[1], argv[2])

        # get the dataset
        train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets(
        )

        # create model
        model = get_model()
        print_model_summary(network=model)

        # Setup target for validation dataset accuracy, only when the valid_accuracy reachs the threshold the weight can be saved
        threshold = THRESHOLD

        # define loss calculation
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

        # Tried RMSprop for optimizer, the result is not so good, finetune the optimizer to Adam or Momentum
        optimizer = tf.keras.optimizers.Adam(lr=GLOBAL_LEARNING_RATE,
                                             decay=WEIGHT_DECAY)
        # optimizer = tf.keras.optimizers.RMSprop(learning_rate = GLOBAL_LEARNING_RATE,momentum = MOMENTUM)

        # Define training KPI
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
            name='train_accuracy')

        # Define valid KPI
        valid_loss = tf.keras.metrics.Mean(name='valid_loss')
        valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
            name='valid_accuracy')

        # @tf.function
        def train(image_batch, label_batch):
            with tf.GradientTape() as tape:
                predictions = model(image_batch, training=True)
                loss = loss_object(y_true=label_batch, y_pred=predictions)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(
                grads_and_vars=zip(gradients, model.trainable_variables))

            train_loss.update_state(values=loss)
            train_accuracy.update_state(y_true=label_batch, y_pred=predictions)

            return predictions.numpy(), tf.math.argmax(predictions,
                                                       axis=1).numpy()

        # @tf.function
        def valid(image_batch, label_batch):
            predictions = model(image_batch, training=True)
            v_loss = loss_object(label_batch, predictions)

            valid_loss.update_state(values=v_loss)
            valid_accuracy.update_state(y_true=label_batch, y_pred=predictions)

            return tf.math.argmax(predictions, axis=1).numpy()

        # start training
        for epoch in range(EPOCHS):
            train_step = 0
            valid_step = 0
            for features in train_dataset:
                train_step += 1
                images, labels = process_features(features,
                                                  data_augmentation=False)
                predictions, predict_labels = train(images, labels)

                # Print the info on the screen for developer to monitor training detail
                print(
                    "Epoch: {}/{}, step: {}/{}, loss: {:.5f}, accuracy: {:.5f}, softmax(logits):{}, "
                    "predict_label:{}, target_label:{}".format(
                        epoch, EPOCHS, train_step,
                        math.ceil(train_count / BATCH_SIZE),
                        train_loss.result().numpy(),
                        train_accuracy.result().numpy(), predictions,
                        predict_labels, labels))

                # Record information into the log file
                file = open(log_dir + "training_result_step" + ".log", "a")
                file.write("train\t")
                file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") +
                           "\t")
                file.write(str(epoch) + "\t")
                file.write(str(train_step) + "\t")
                file.write(str(train_accuracy.result().numpy()) + "\t")
                file.write(str(predict_labels) + "\t")
                file.write(str(labels) + "\n")
                file.close()

            for features in valid_dataset:
                valid_step += 1
                valid_images, valid_labels = process_features(
                    features, data_augmentation=False)
                predict_labels = valid(valid_images, valid_labels)

                file = open(log_dir + "training_result_step" + ".log", "a")
                file.write("validation\t")
                file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") +
                           "\t")
                file.write(str(epoch) + "\t")
                file.write(str(valid_step) + "\t")
                file.write(str(valid_accuracy.result().numpy()) + "\t")
                file.write(str(predict_labels) + "\t")
                file.write(str(valid_labels) + "\n")
                file.close()

            # Print the info on the screen for developer to monitor validation result
            print("Epoch: {}/{}, train loss: {:.5f}, train accuracy: {:.5f}, "
                  "valid loss: {:.5f}, valid accuracy: {:.5f}".format(
                      epoch, EPOCHS,
                      train_loss.result().numpy(),
                      train_accuracy.result().numpy(),
                      valid_loss.result().numpy(),
                      valid_accuracy.result().numpy()))
            # Create log file in txt format, easy for pandas to analysis and for best model selection
            file = open(log_dir + "training_result" + ".log", "a")
            file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
            file.write(str(epoch) + "\t")
            file.write(str(valid_accuracy.result().numpy()) + "\n")
            file.close()

            valid_accuracy_result = valid_accuracy.result().numpy()

            train_loss.reset_states()
            train_accuracy.reset_states()
            valid_loss.reset_states()
            valid_accuracy.reset_states()

            # Save the weights for evaluation and prediction only when the valid accuracy is higher than threshold and best ever result
            if epoch % save_every_n_epoch == 0:
                if valid_accuracy_result >= threshold:
                    model.save_weights(filepath=save_model_dir + str(epoch) +
                                       "/model",
                                       save_format='tf')
                    # model._set_inputs(inputs=tf.random.normal(shape=(1, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS)))
                    # tf.keras.models.save_model(model, save_model_dir + str(epoch), save_format='tf')

                    # Threshold update
                    threshold = valid_accuracy_result
Example #2
0
    return images, labels


if __name__ == '__main__':
    print(tf.__name__, ": ", tf.__version__, sep="")
    # GPU settings
    gpus = tf.config.list_physical_devices("GPU")
    if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    print("Num GPUs Available: ",
          len(tf.config.experimental.list_physical_devices('GPU')))

    # get the dataset
    train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets(
    )

    model = get_model()

    checkpoint_save_path = "./saved_model/resnet_101/epoch-0"
    if os.path.exists(checkpoint_save_path + '.index'):
        print('-------------load the model-----------------')
        model.load_weights(checkpoint_save_path)
    #
    # model_save_path = "./saved_model/epoch-50.index"
    # if os.path.exists(model_save_path):
    #     print('-------------load the model-----------------')
    #     model.load_weights(filepath=model_save_path)

    print_model_summary(network=model)
Example #3
0
def main(argv):
    # Need the user to provide system argv for job_id and product_id, it is prepared for frontend calling
    if len(argv) < 2 or len(argv) > 3:
        print(
            "ERROR: Format error, refer to the usage: python test.py job_id product_id"
        )
    elif not argv[1].isdigit():
        print("ERROR: Format error, job_id must be in int format")
    elif not argv[1].isalnum():
        print(
            "ERROR: Format error, product_id must be consistent by character or number, without special character"
        )
    else:
        print("INFO: Start evaluating model " +
              datetime.datetime.now().strftime("%Y%m%d%H%M%S"))

        # GPU settings
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

        # Folder generate for log file and model saving
        log_dir, save_model_dir = folder_preparation(argv[1], argv[2])

        # get the original_dataset
        train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets(
        )
        # load the model
        model = get_model()
        model.load_weights(filepath=save_model_dir + "model")
        # model = tf.saved_model.load(save_model_dir)

        # Get the accuracy on the test set
        loss_object = tf.keras.metrics.SparseCategoricalCrossentropy()
        test_loss = tf.keras.metrics.Mean()
        test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

        # @tf.function
        def test_step(images, labels):
            predictions = model(images, training=False)
            t_loss = loss_object(labels, predictions)
            test_loss(t_loss)
            test_accuracy(labels, predictions)

            return tf.math.argmax(predictions, axis=1).numpy()

        batch = 0
        for features in test_dataset:
            batch += 1
            test_images, test_labels = process_features(
                features, data_augmentation=False)
            predict_labels = test_step(test_images, test_labels)
            print(
                "loss: {:.5f}, test accuracy: {:.5f}, predict_labels:{}, test_labels:{}"
                .format(test_loss.result(), test_accuracy.result(),
                        predict_labels, test_labels))

            file = open(log_dir + "test_result_step" + ".log", "a")
            file.write("test\t")
            file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
            file.write(str(batch) + "\t")
            file.write(str(test_accuracy.result().numpy()) + "\t")
            file.write(str(predict_labels) + "\t")
            file.write(str(test_labels) + "\n")
            file.close()

        print("The accuracy on test set is: {:.3f}%".format(
            test_accuracy.result() * 100))

        file = open(log_dir + "test_result" + ".log", "a")
        file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
        file.write(str(test_accuracy.result()) + "\n")
        file.close()