Exemple #1
0
def main(args):
    f_name = args[0]
    # read in image
    im = cv2.resize(cv2.imread(f_name), (224, 224)).astype(np.float32)
    # normalize based on VGG16 train data (from authors)
    im[:, :, 0] -= 103.939
    im[:, :, 1] -= 116.779
    im[:, :, 2] -= 123.68
    im = im.transpose((2, 0, 1))
    im = np.expand_dims(im, axis=0)

    # Test pretrained model
    model = VGG_16('../vgg16_weights.h5')
    out = model.predict(im)
    print np.argmax(out)  # index to VGG16 dataset categories
Exemple #2
0
            file_name = img['file_name']
            ret[file_name] = caption
    return ret

# Read image file_name and it's caption
train_file_name_caption = get_file_name_caption(coco_train_caption)
val_file_name_caption = get_file_name_caption(coco_val_caption)

file_name_caption = {}
for x, y in train_file_name_caption.items():
    file_name_caption[x] = y
for x, y in val_file_name_caption.items():
    file_name_caption[x] = y

# VGG 16 model with pretrained weights
vgg_model = VGG_16()
vgg_model.load_weights(vgg_model_weights)
vgg_model.layers.pop()
vgg_model.layers.pop()
vgg_model.outputs = [vgg_model.layers[-1].output]
vgg_model.layers[-1].outbound_nodes = []
vgg_model.trainable = False

batch_size = 64

file_name_images = {}
X = []

for file_name in tqdm(train_file_name_caption.keys(), file=sys.stdout, total=len(train_file_name_caption)):
    path = os.path.join(coco_train, file_name)
    img = np.array(Image.open(path))
Exemple #3
0
# -*- coding: utf-8 -*-
import torch
import cv2
import numpy as np
from vgg16 import VGG_16
import torch.nn.functional as F

if __name__ == "__main__":
    model = VGG_16()
    model.load_state_dict(torch.load("./pretrained/vgg_face_dag.pth"))
    model.eval()
    im = cv2.imread("./images/Aamir_Khan1.png")
    im = torch.Tensor(im).permute(2, 0, 1).view(1, 3, 224, 224)
    im -= torch.Tensor(np.array([129.1863, 104.7624,
                                 93.5940])).view(1, 3, 1, 1)
    preds = F.softmax(model(im), -1)
    values, index = preds.max(-1)
    with open("./images/names.txt", 'r') as f:
        names = f.readlines()
    print("Index: %d, Confidence: %f, Name: %s" %
          (index, values, names[index]))
Exemple #4
0
def main(_):
    ########################
    ### Load config file ###
    ########################
    if tf.app.flags.FLAGS.config is None:
        logging.error("No config file is provided.")
        logging.info("Usage: python train.py --config config.json")
        exit(1)

    with open(tf.app.flags.FLAGS.config) as config_file:
        cfg = json.load(config_file)

    ###############################
    ### Setup dataset iterators ###
    ###############################
    test_dataset = build_test_dataset(cfg)

    test_dataset = test_dataset.batch(1)  # Process one image at a time

    # Setup test dataset iterators
    test_iterator = test_dataset.make_one_shot_iterator()
    next_element = test_iterator.get_next()

    ############################
    ### Initialize the model ###
    ############################
    if cfg['model'] == 'vgg16_fcn':
        conv7_features, pool4_features, pool3_features = VGG_16(
            image_batch_tensor=next_element[0], is_training=False)

        output_logits_dict = FCN_EXTENSION(conv7_features, pool4_features,
                                           pool3_features, len(cfg['classes']))
    elif cfg['model'] == 'mobilenet_fcn':
        conv13_features, conv11_features, conv5_features = MOBILENET(
            image_batch_tensor=next_element[0], is_training=False)
        output_logits_dict = FCN_EXTENSION(conv13_features, conv11_features,
                                           conv5_features, len(cfg['classes']))
    elif cfg['model'] == 'mobilenet_fcn_sep':
        conv13_features, conv11_features, conv5_features = MOBILENET(
            image_batch_tensor=next_element[0], is_training=False)
        output_logits_dict = FCN_SEP_EXTENSION(conv13_features,
                                               conv11_features, conv5_features,
                                               len(cfg['classes']))

    #######################################
    ### Process annotation/label tensor ###
    #######################################

    annotation_batch_tensor = tf.squeeze(
        next_element[1],
        axis=-1)  # (b x h x w x 1) -> (b x h x w)after squeeze
    # Later, resize the predictions to match the annotation tensor. Unfortunately,
    # the KITTI dataset has slightly different dimensions for some images, so this
    # is necessary:
    annotation_shape = tf.shape(annotation_batch_tensor)

    # Get (b, w, h) indices of pixel that are not masked out
    valid_batch_indices = get_valid_entries_indices_from_annotation_batch(
        annotation_batch_tensor=annotation_batch_tensor)

    # Keep only valid pixels:
    valid_labels_batch_tensor = tf.gather_nd(params=annotation_batch_tensor,
                                             indices=valid_batch_indices)

    ##########################
    ### Add evaluation ops ###
    ##########################
    # Note: We use FCN-8s output for evaluation because all weights were
    # zero-initialized, so the untrained layers will not affect the output. In
    # other words, initially (before training FCN-16s and FCN-8s), the FCN-8s
    # output is the same as the FCN-32s output.

    # Resize logits to original image size
    orig_size_logits = tf.image.resize_images(
        output_logits_dict['fcn8'], [annotation_shape[1], annotation_shape[2]],
        method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

    orig_size_pred = tf.argmax(
        orig_size_logits,
        axis=3)  # Predictions for all pixels (including masked)

    # Flattened array of logits for non-masked pixels
    valid_logits_batch_tensor = tf.gather_nd(params=orig_size_logits,
                                             indices=valid_batch_indices)

    # Calculate accuracy over non-masked pixels:
    valid_pred_batch_tensor = tf.argmax(valid_logits_batch_tensor, axis=1)
    iou_metric_calculate, iou_metric_update = tf.metrics.mean_iou(
        valid_labels_batch_tensor,
        valid_pred_batch_tensor,
        len(cfg['classes']),
        name='iou_metric')
    iou_metric_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                        scope='iou_metric')
    iou_metric_initializer = tf.variables_initializer(var_list=iou_metric_vars)

    ##################################
    ### Initialize model variables ###
    ##################################
    global_vars_init_op = tf.global_variables_initializer()
    local_vars_init_op = tf.local_variables_initializer()
    combined_init_op = tf.group(local_vars_init_op, global_vars_init_op)

    # Saver is used to load full model checkpoints
    model_variables = slim.get_model_variables()
    saver = tf.train.Saver(model_variables)

    #######################
    ### Test the network ###
    #######################
    with tf.Session() as sess:
        ###### Initialize variables #####
        sess.run(combined_init_op)
        # Always restore from trained checkpoint when testing. Base checkpoint would produce awful performance.
        saver.restore(sess, cfg['trained_checkpoint_filename'])

        ###### Test ######
        sess.run(iou_metric_initializer)  # Reset the iou metric
        while True:  # Iterate over entire validation set
            try:
                class_predictions, input_file_name, _ = sess.run(
                    [orig_size_pred, next_element[2], iou_metric_update])
                iou = sess.run(iou_metric_calculate)  # IoU to date
                print('Cumulative mean IoU: ' + str(iou))
                save_output_image(class_predictions[0], input_file_name, cfg)
            except tf.errors.OutOfRangeError:  # Thrown when end of dataset is reached
                break
        print('Final Mean IoU: ' + str(iou))
Exemple #5
0
    print(f"val_steps: {val_steps}")

    # # Create the data generators
    trainGen = batchGenerator(train_images, train_labels, batch_size)
    testGen = batchGenerator(test_images, test_labels, batch_size)

    # # Build the model
    classes = 1
    classifier_activation = 'sigmoid'
    loss_type = 'binary_crossentropy'
    lst_metrics = ['accuracy']
    lr_rate = 0.01

    with strategy.scope():
        model = VGG_16(input_shape=(512, 512, 1),
                       classes=classes,
                       classifier_activation=classifier_activation)
        opt = tf.keras.optimizers.SGD(learning_rate=lr_rate, momentum=0.9)
        model.compile(loss=loss_type, optimizer=opt, metrics=lst_metrics)

    # Print Model Summary
    print(model.summary())

    # Train the model
    model_checkpoint = tf.keras.callbacks.ModelCheckpoint(output + '.h5',
                                                          monitor='accuracy',
                                                          verbose=1,
                                                          save_best_only=True)
    H = model.fit(x=trainGen,
                  steps_per_epoch=train_steps,
                  validation_data=testGen,
def main(_):
    ########################
    ### Load config file ###
    ########################
    if tf.app.flags.FLAGS.config is None:
        logging.error("No config file is provided.")
        logging.info("Usage: python train.py --config config.json")
        exit(1)

    with open(tf.app.flags.FLAGS.config) as config_file:
        cfg = json.load(config_file)

    ###############################
    ### Setup dataset iterators ###
    ###############################
    train_dataset, val_dataset = build_train_val_datasets(cfg)

    train_dataset = train_dataset.batch(
        1
    )  # Batch size of 1 is really like a large batch size, because we are updating based on the loss at every pixel
    val_dataset = val_dataset.batch(1)
    # Create handle to control which dataset is fed into the model:
    handle = tf.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(handle,
                                                   train_dataset.output_types,
                                                   train_dataset.output_shapes)
    next_element = iterator.get_next()

    # Setup training and validation dataset iterators
    train_iterator = train_dataset.make_initializable_iterator()
    val_iterator = val_dataset.make_initializable_iterator()

    ############################
    ### Initialize the model ###
    ############################
    is_training = tf.placeholder(
        tf.bool
    )  # Controls whether dropout layers are enabled in VGG16 network

    conv7_features, pool4_features, pool3_features = VGG_16(
        image_batch_tensor=next_element[0], is_training=is_training)

    output_logits_dict = FCN_EXTENSION(conv7_features, pool4_features,
                                       pool3_features, len(cfg['classes']))

    #######################################
    ### Process annotation/label tensor ###
    #######################################
    annotation_batch_tensor = tf.squeeze(
        next_element[1],
        axis=-1)  # (b x h x w x 1) -> (b x h x w)after squeeze

    # Convert annotation tensor to one-hot encoded labels for comparison against upsampled logits
    labels_one_hot_batch_tensor = get_one_hot_labels_from_annotation_batch(
        annotation_batch_tensor=annotation_batch_tensor,
        num_classes=len(cfg['classes']))
    # Get (b, w, h) indices of pixel that are not masked out
    valid_batch_indices = get_valid_entries_indices_from_annotation_batch(
        annotation_batch_tensor=annotation_batch_tensor)
    # Keep only valid pixels from the labels, one-hot labels, and logits
    valid_labels_one_hot_batch_tensor = tf.gather_nd(
        params=labels_one_hot_batch_tensor, indices=valid_batch_indices)
    valid_labels_batch_tensor = tf.gather_nd(params=annotation_batch_tensor,
                                             indices=valid_batch_indices)

    training_stages = ['fcn32', 'fcn16', 'fcn8']
    learning_rates = {'fcn32': 1, 'fcn16': 0.1, 'fcn8': 0.01}
    #####################################
    ### FCN-32s, FCN-16s, FCN-8s Loss ###
    #####################################
    train_steps = {}
    cross_entropy_means = {}
    iou_metric_calculators = {}
    iou_metric_updaters = {}
    iou_metric_initializers = {}
    # Each batchnorm layer has an update op that must run during training to
    # update the moving averages tracked by the batchnorm layer:
    batch_norm_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    for stage in training_stages:
        valid_logits_batch_tensor = tf.gather_nd(
            params=output_logits_dict[stage], indices=valid_batch_indices)
        cross_entropies = tf.nn.softmax_cross_entropy_with_logits(
            logits=valid_logits_batch_tensor,
            labels=valid_labels_one_hot_batch_tensor)

        # Normalize the cross entropy -- the number of elements is different during
        # each step due to masked out regions
        cross_entropy_means[stage] = tf.reduce_mean(cross_entropies)

        with tf.variable_scope(stage + '_sgd'):
            with tf.control_dependencies(
                    batch_norm_update_ops
            ):  # Ensure that batchnorm statistics get updated at every train step
                train_steps[stage] = tf.train.MomentumOptimizer(
                    learning_rate=cfg['learning_rate'] * learning_rates[stage],
                    momentum=cfg['momentum']).minimize(
                        cross_entropy_means[stage])

        valid_pred_batch_tensor = tf.argmax(valid_logits_batch_tensor, axis=1)

        iou_metric_calculators[stage], iou_metric_updaters[
            stage] = tf.metrics.mean_iou(valid_labels_batch_tensor,
                                         valid_pred_batch_tensor,
                                         len(cfg['classes']),
                                         name=stage + '_iou_metric')
        iou_metric_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                            scope=stage + '_iou_metric')
        iou_metric_initializers[stage] = tf.variables_initializer(
            var_list=iou_metric_vars)

    ##########################
    ### Add prediction ops ###
    ##########################
    # Note: We use FCN-8s output for evaluation because all weights were
    # zero-initialized, so the untrained layers will not affect the output. In
    # other words, initially (before training FCN-16s and FCN-8s), the FCN-8s
    # output is the same as the FCN-32s output.
    pred = tf.argmax(output_logits_dict['fcn8'],
                     axis=3)  # Predictions for all pixels (including masked)
    probabilities = tf.nn.softmax(
        output_logits_dict['fcn8']
    )  # Probabilities for each class for each pixel (including masked)

    ##################################
    ### Initialize model variables ###
    ##################################
    if not cfg[
            'restore_from_trained_checkpoint']:  # Load pretrained VGG-16 Imagenet weights
        variables_to_restore = slim.get_model_variables('vgg_16')
        init_vgg16_fn = slim.assign_from_checkpoint_fn(
            model_path=cfg['base_checkpoint_filename'],
            var_list=variables_to_restore)

    global_vars_init_op = tf.global_variables_initializer()
    local_vars_init_op = tf.local_variables_initializer()
    print(global_vars_init_op)
    combined_init_op = tf.group(local_vars_init_op, global_vars_init_op)

    # Create initializer that can be run at the start of each stage to ensure
    # that the next stage is initialized as desired
    stage_reinitializers = {}
    global_variable_list = tf.global_variables()

    fcn16_fc_weights = [
        v for v in global_variable_list
        if v.name == "fcn16/downsampled_by_16_fc/weights:0"
    ][0]
    fcn16_upsample_by_2_filter = [
        v for v in global_variable_list if v.name == "fcn16/upsample_by_2:0"
    ][0]
    stage_reinitializers['fcn16'] = tf.variables_initializer(
        [fcn16_fc_weights, fcn16_upsample_by_2_filter])

    fcn8_fc_weights = [
        v for v in global_variable_list
        if v.name == "fcn8/downsampled_by_8_fc/weights:0"
    ][0]
    fcn8_upsample_by_2_filter = [
        v for v in global_variable_list if v.name == "fcn8/upsample_by_2:0"
    ][0]
    stage_reinitializers['fcn8'] = tf.variables_initializer(
        [fcn8_fc_weights, fcn8_upsample_by_2_filter])

    # We need this to save only model variables and omit
    # optimization-related and other variables.
    model_variables = slim.get_model_variables()
    saver = tf.train.Saver(
        model_variables
    )  # saver is used to save and load full model checkpoints

    #######################
    ### Train the model ###
    #######################
    with tf.Session() as sess:
        # Get training and validation iterator handles to feed into the model
        training_handle = sess.run(train_iterator.string_handle())
        validation_handle = sess.run(val_iterator.string_handle())

        ###### Initialize variables #####
        sess.run(combined_init_op)
        if cfg['restore_from_trained_checkpoint']:
            saver.restore(sess, cfg['trained_checkpoint_filename'])
        else:
            init_vgg16_fn(
                sess
            )  # Load base checkpoint (ex: vgg16 weights before training FCN32 weights)

        # Best performance achieved on the validation set so far:
        best_validation_metric = None

        for stage in training_stages:  # Train FCN-32s, then FCN-16s, then FCN-8s
            print('***** Starting training of ' + stage + ' model. *****')

            # If this is the fcn16 or fcn8 training stage
            if stage != 'fcn32':
                print('Reverting to checkpoint of best model so far')
                saver.restore(sess, cfg['trained_checkpoint_filename'])
                # Reinitialize the weights for the next layer (to zeros for the
                # 'fully connected'-like layer and to bilinear interpolation for the upsampling layer)
                sess.run(stage_reinitializers[stage])

            # Number of consecutive times that validation set performance did not
            # improve. If this reaches 4, continue to next training stage:
            validation_did_not_improve_count = 0

            # Set to True if validation performance has not improved for 5 consecutive tests
            done_stage = False

            # Training step count. Every 30 steps, test on validation.
            train_count = 0

            epoch = 0
            # Train for at most 30 epochs in each stage
            # (we will continue to next stage sooner if performance on the
            # validation set is not improving)
            while epoch < 30 and not done_stage:
                epoch += 1
                sess.run(train_iterator.initializer
                         )  # Re-initialize the training iterator

                while True:  # Iterate over entire training set
                    try:
                        ##### Training Step #####
                        train_count += 1
                        cross_entropy, _ = sess.run(
                            [cross_entropy_means[stage], train_steps[stage]],
                            feed_dict={
                                handle: training_handle,
                                is_training: True
                            })
                        print(
                            str(train_count) + " Current loss: " +
                            str(cross_entropy))
                    except tf.errors.OutOfRangeError:  # Thrown when end of training dataset is reached
                        break

                    # After every batch of training images:
                    # - Run validation
                    # - Save a checkpoint if the model has improved
                    if train_count % cfg['train_steps_between_validation'] == 0:
                        ##### Validation #####
                        print("Performing validation:")
                        sess.run(val_iterator.initializer
                                 )  # Re-initialize the validation val_iterator
                        sess.run(iou_metric_initializers[stage]
                                 )  # Reset the iou metric
                        validation_metric_total = 0
                        validation_count = 0
                        while True:  # Iterate over entire validation set
                            try:
                                sess.run(iou_metric_updaters[stage],
                                         feed_dict={
                                             handle: validation_handle,
                                             is_training: False
                                         })
                                iou = sess.run(iou_metric_calculators[stage])
                                print(
                                    'Cumulative mean IoU: ' + str(iou)
                                )  # This is not the iou for a single image, but rather the iou calculated up to this point
                            except tf.errors.OutOfRangeError:  # Thrown when end of validation dataset is reached:
                                break
                        final_iou = iou

                        ##### Save Checkpoint #####
                        # If this is the first checkpoint, or the model performed better than the best so far on the validation set
                        if (best_validation_metric is
                                None) or (final_iou > best_validation_metric):
                            save_path = saver.save(
                                sess, cfg['trained_checkpoint_filename'])
                            print("Model saved in file: %s" % save_path)
                            best_validation_metric = final_iou
                            validation_did_not_improve_count = 0  # reset count
                        else:
                            print(
                                "Not saving checkpoint, because model had worse performance on validation set."
                            )
                            validation_did_not_improve_count += 1

                        if validation_did_not_improve_count >= 4:
                            done_stage = True
                            break