def main(args): f_name = args[0] # read in image im = cv2.resize(cv2.imread(f_name), (224, 224)).astype(np.float32) # normalize based on VGG16 train data (from authors) im[:, :, 0] -= 103.939 im[:, :, 1] -= 116.779 im[:, :, 2] -= 123.68 im = im.transpose((2, 0, 1)) im = np.expand_dims(im, axis=0) # Test pretrained model model = VGG_16('../vgg16_weights.h5') out = model.predict(im) print np.argmax(out) # index to VGG16 dataset categories
file_name = img['file_name'] ret[file_name] = caption return ret # Read image file_name and it's caption train_file_name_caption = get_file_name_caption(coco_train_caption) val_file_name_caption = get_file_name_caption(coco_val_caption) file_name_caption = {} for x, y in train_file_name_caption.items(): file_name_caption[x] = y for x, y in val_file_name_caption.items(): file_name_caption[x] = y # VGG 16 model with pretrained weights vgg_model = VGG_16() vgg_model.load_weights(vgg_model_weights) vgg_model.layers.pop() vgg_model.layers.pop() vgg_model.outputs = [vgg_model.layers[-1].output] vgg_model.layers[-1].outbound_nodes = [] vgg_model.trainable = False batch_size = 64 file_name_images = {} X = [] for file_name in tqdm(train_file_name_caption.keys(), file=sys.stdout, total=len(train_file_name_caption)): path = os.path.join(coco_train, file_name) img = np.array(Image.open(path))
# -*- coding: utf-8 -*- import torch import cv2 import numpy as np from vgg16 import VGG_16 import torch.nn.functional as F if __name__ == "__main__": model = VGG_16() model.load_state_dict(torch.load("./pretrained/vgg_face_dag.pth")) model.eval() im = cv2.imread("./images/Aamir_Khan1.png") im = torch.Tensor(im).permute(2, 0, 1).view(1, 3, 224, 224) im -= torch.Tensor(np.array([129.1863, 104.7624, 93.5940])).view(1, 3, 1, 1) preds = F.softmax(model(im), -1) values, index = preds.max(-1) with open("./images/names.txt", 'r') as f: names = f.readlines() print("Index: %d, Confidence: %f, Name: %s" % (index, values, names[index]))
def main(_): ######################## ### Load config file ### ######################## if tf.app.flags.FLAGS.config is None: logging.error("No config file is provided.") logging.info("Usage: python train.py --config config.json") exit(1) with open(tf.app.flags.FLAGS.config) as config_file: cfg = json.load(config_file) ############################### ### Setup dataset iterators ### ############################### test_dataset = build_test_dataset(cfg) test_dataset = test_dataset.batch(1) # Process one image at a time # Setup test dataset iterators test_iterator = test_dataset.make_one_shot_iterator() next_element = test_iterator.get_next() ############################ ### Initialize the model ### ############################ if cfg['model'] == 'vgg16_fcn': conv7_features, pool4_features, pool3_features = VGG_16( image_batch_tensor=next_element[0], is_training=False) output_logits_dict = FCN_EXTENSION(conv7_features, pool4_features, pool3_features, len(cfg['classes'])) elif cfg['model'] == 'mobilenet_fcn': conv13_features, conv11_features, conv5_features = MOBILENET( image_batch_tensor=next_element[0], is_training=False) output_logits_dict = FCN_EXTENSION(conv13_features, conv11_features, conv5_features, len(cfg['classes'])) elif cfg['model'] == 'mobilenet_fcn_sep': conv13_features, conv11_features, conv5_features = MOBILENET( image_batch_tensor=next_element[0], is_training=False) output_logits_dict = FCN_SEP_EXTENSION(conv13_features, conv11_features, conv5_features, len(cfg['classes'])) ####################################### ### Process annotation/label tensor ### ####################################### annotation_batch_tensor = tf.squeeze( next_element[1], axis=-1) # (b x h x w x 1) -> (b x h x w)after squeeze # Later, resize the predictions to match the annotation tensor. Unfortunately, # the KITTI dataset has slightly different dimensions for some images, so this # is necessary: annotation_shape = tf.shape(annotation_batch_tensor) # Get (b, w, h) indices of pixel that are not masked out valid_batch_indices = get_valid_entries_indices_from_annotation_batch( annotation_batch_tensor=annotation_batch_tensor) # Keep only valid pixels: valid_labels_batch_tensor = tf.gather_nd(params=annotation_batch_tensor, indices=valid_batch_indices) ########################## ### Add evaluation ops ### ########################## # Note: We use FCN-8s output for evaluation because all weights were # zero-initialized, so the untrained layers will not affect the output. In # other words, initially (before training FCN-16s and FCN-8s), the FCN-8s # output is the same as the FCN-32s output. # Resize logits to original image size orig_size_logits = tf.image.resize_images( output_logits_dict['fcn8'], [annotation_shape[1], annotation_shape[2]], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) orig_size_pred = tf.argmax( orig_size_logits, axis=3) # Predictions for all pixels (including masked) # Flattened array of logits for non-masked pixels valid_logits_batch_tensor = tf.gather_nd(params=orig_size_logits, indices=valid_batch_indices) # Calculate accuracy over non-masked pixels: valid_pred_batch_tensor = tf.argmax(valid_logits_batch_tensor, axis=1) iou_metric_calculate, iou_metric_update = tf.metrics.mean_iou( valid_labels_batch_tensor, valid_pred_batch_tensor, len(cfg['classes']), name='iou_metric') iou_metric_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope='iou_metric') iou_metric_initializer = tf.variables_initializer(var_list=iou_metric_vars) ################################## ### Initialize model variables ### ################################## global_vars_init_op = tf.global_variables_initializer() local_vars_init_op = tf.local_variables_initializer() combined_init_op = tf.group(local_vars_init_op, global_vars_init_op) # Saver is used to load full model checkpoints model_variables = slim.get_model_variables() saver = tf.train.Saver(model_variables) ####################### ### Test the network ### ####################### with tf.Session() as sess: ###### Initialize variables ##### sess.run(combined_init_op) # Always restore from trained checkpoint when testing. Base checkpoint would produce awful performance. saver.restore(sess, cfg['trained_checkpoint_filename']) ###### Test ###### sess.run(iou_metric_initializer) # Reset the iou metric while True: # Iterate over entire validation set try: class_predictions, input_file_name, _ = sess.run( [orig_size_pred, next_element[2], iou_metric_update]) iou = sess.run(iou_metric_calculate) # IoU to date print('Cumulative mean IoU: ' + str(iou)) save_output_image(class_predictions[0], input_file_name, cfg) except tf.errors.OutOfRangeError: # Thrown when end of dataset is reached break print('Final Mean IoU: ' + str(iou))
print(f"val_steps: {val_steps}") # # Create the data generators trainGen = batchGenerator(train_images, train_labels, batch_size) testGen = batchGenerator(test_images, test_labels, batch_size) # # Build the model classes = 1 classifier_activation = 'sigmoid' loss_type = 'binary_crossentropy' lst_metrics = ['accuracy'] lr_rate = 0.01 with strategy.scope(): model = VGG_16(input_shape=(512, 512, 1), classes=classes, classifier_activation=classifier_activation) opt = tf.keras.optimizers.SGD(learning_rate=lr_rate, momentum=0.9) model.compile(loss=loss_type, optimizer=opt, metrics=lst_metrics) # Print Model Summary print(model.summary()) # Train the model model_checkpoint = tf.keras.callbacks.ModelCheckpoint(output + '.h5', monitor='accuracy', verbose=1, save_best_only=True) H = model.fit(x=trainGen, steps_per_epoch=train_steps, validation_data=testGen,
def main(_): ######################## ### Load config file ### ######################## if tf.app.flags.FLAGS.config is None: logging.error("No config file is provided.") logging.info("Usage: python train.py --config config.json") exit(1) with open(tf.app.flags.FLAGS.config) as config_file: cfg = json.load(config_file) ############################### ### Setup dataset iterators ### ############################### train_dataset, val_dataset = build_train_val_datasets(cfg) train_dataset = train_dataset.batch( 1 ) # Batch size of 1 is really like a large batch size, because we are updating based on the loss at every pixel val_dataset = val_dataset.batch(1) # Create handle to control which dataset is fed into the model: handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle(handle, train_dataset.output_types, train_dataset.output_shapes) next_element = iterator.get_next() # Setup training and validation dataset iterators train_iterator = train_dataset.make_initializable_iterator() val_iterator = val_dataset.make_initializable_iterator() ############################ ### Initialize the model ### ############################ is_training = tf.placeholder( tf.bool ) # Controls whether dropout layers are enabled in VGG16 network conv7_features, pool4_features, pool3_features = VGG_16( image_batch_tensor=next_element[0], is_training=is_training) output_logits_dict = FCN_EXTENSION(conv7_features, pool4_features, pool3_features, len(cfg['classes'])) ####################################### ### Process annotation/label tensor ### ####################################### annotation_batch_tensor = tf.squeeze( next_element[1], axis=-1) # (b x h x w x 1) -> (b x h x w)after squeeze # Convert annotation tensor to one-hot encoded labels for comparison against upsampled logits labels_one_hot_batch_tensor = get_one_hot_labels_from_annotation_batch( annotation_batch_tensor=annotation_batch_tensor, num_classes=len(cfg['classes'])) # Get (b, w, h) indices of pixel that are not masked out valid_batch_indices = get_valid_entries_indices_from_annotation_batch( annotation_batch_tensor=annotation_batch_tensor) # Keep only valid pixels from the labels, one-hot labels, and logits valid_labels_one_hot_batch_tensor = tf.gather_nd( params=labels_one_hot_batch_tensor, indices=valid_batch_indices) valid_labels_batch_tensor = tf.gather_nd(params=annotation_batch_tensor, indices=valid_batch_indices) training_stages = ['fcn32', 'fcn16', 'fcn8'] learning_rates = {'fcn32': 1, 'fcn16': 0.1, 'fcn8': 0.01} ##################################### ### FCN-32s, FCN-16s, FCN-8s Loss ### ##################################### train_steps = {} cross_entropy_means = {} iou_metric_calculators = {} iou_metric_updaters = {} iou_metric_initializers = {} # Each batchnorm layer has an update op that must run during training to # update the moving averages tracked by the batchnorm layer: batch_norm_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) for stage in training_stages: valid_logits_batch_tensor = tf.gather_nd( params=output_logits_dict[stage], indices=valid_batch_indices) cross_entropies = tf.nn.softmax_cross_entropy_with_logits( logits=valid_logits_batch_tensor, labels=valid_labels_one_hot_batch_tensor) # Normalize the cross entropy -- the number of elements is different during # each step due to masked out regions cross_entropy_means[stage] = tf.reduce_mean(cross_entropies) with tf.variable_scope(stage + '_sgd'): with tf.control_dependencies( batch_norm_update_ops ): # Ensure that batchnorm statistics get updated at every train step train_steps[stage] = tf.train.MomentumOptimizer( learning_rate=cfg['learning_rate'] * learning_rates[stage], momentum=cfg['momentum']).minimize( cross_entropy_means[stage]) valid_pred_batch_tensor = tf.argmax(valid_logits_batch_tensor, axis=1) iou_metric_calculators[stage], iou_metric_updaters[ stage] = tf.metrics.mean_iou(valid_labels_batch_tensor, valid_pred_batch_tensor, len(cfg['classes']), name=stage + '_iou_metric') iou_metric_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope=stage + '_iou_metric') iou_metric_initializers[stage] = tf.variables_initializer( var_list=iou_metric_vars) ########################## ### Add prediction ops ### ########################## # Note: We use FCN-8s output for evaluation because all weights were # zero-initialized, so the untrained layers will not affect the output. In # other words, initially (before training FCN-16s and FCN-8s), the FCN-8s # output is the same as the FCN-32s output. pred = tf.argmax(output_logits_dict['fcn8'], axis=3) # Predictions for all pixels (including masked) probabilities = tf.nn.softmax( output_logits_dict['fcn8'] ) # Probabilities for each class for each pixel (including masked) ################################## ### Initialize model variables ### ################################## if not cfg[ 'restore_from_trained_checkpoint']: # Load pretrained VGG-16 Imagenet weights variables_to_restore = slim.get_model_variables('vgg_16') init_vgg16_fn = slim.assign_from_checkpoint_fn( model_path=cfg['base_checkpoint_filename'], var_list=variables_to_restore) global_vars_init_op = tf.global_variables_initializer() local_vars_init_op = tf.local_variables_initializer() print(global_vars_init_op) combined_init_op = tf.group(local_vars_init_op, global_vars_init_op) # Create initializer that can be run at the start of each stage to ensure # that the next stage is initialized as desired stage_reinitializers = {} global_variable_list = tf.global_variables() fcn16_fc_weights = [ v for v in global_variable_list if v.name == "fcn16/downsampled_by_16_fc/weights:0" ][0] fcn16_upsample_by_2_filter = [ v for v in global_variable_list if v.name == "fcn16/upsample_by_2:0" ][0] stage_reinitializers['fcn16'] = tf.variables_initializer( [fcn16_fc_weights, fcn16_upsample_by_2_filter]) fcn8_fc_weights = [ v for v in global_variable_list if v.name == "fcn8/downsampled_by_8_fc/weights:0" ][0] fcn8_upsample_by_2_filter = [ v for v in global_variable_list if v.name == "fcn8/upsample_by_2:0" ][0] stage_reinitializers['fcn8'] = tf.variables_initializer( [fcn8_fc_weights, fcn8_upsample_by_2_filter]) # We need this to save only model variables and omit # optimization-related and other variables. model_variables = slim.get_model_variables() saver = tf.train.Saver( model_variables ) # saver is used to save and load full model checkpoints ####################### ### Train the model ### ####################### with tf.Session() as sess: # Get training and validation iterator handles to feed into the model training_handle = sess.run(train_iterator.string_handle()) validation_handle = sess.run(val_iterator.string_handle()) ###### Initialize variables ##### sess.run(combined_init_op) if cfg['restore_from_trained_checkpoint']: saver.restore(sess, cfg['trained_checkpoint_filename']) else: init_vgg16_fn( sess ) # Load base checkpoint (ex: vgg16 weights before training FCN32 weights) # Best performance achieved on the validation set so far: best_validation_metric = None for stage in training_stages: # Train FCN-32s, then FCN-16s, then FCN-8s print('***** Starting training of ' + stage + ' model. *****') # If this is the fcn16 or fcn8 training stage if stage != 'fcn32': print('Reverting to checkpoint of best model so far') saver.restore(sess, cfg['trained_checkpoint_filename']) # Reinitialize the weights for the next layer (to zeros for the # 'fully connected'-like layer and to bilinear interpolation for the upsampling layer) sess.run(stage_reinitializers[stage]) # Number of consecutive times that validation set performance did not # improve. If this reaches 4, continue to next training stage: validation_did_not_improve_count = 0 # Set to True if validation performance has not improved for 5 consecutive tests done_stage = False # Training step count. Every 30 steps, test on validation. train_count = 0 epoch = 0 # Train for at most 30 epochs in each stage # (we will continue to next stage sooner if performance on the # validation set is not improving) while epoch < 30 and not done_stage: epoch += 1 sess.run(train_iterator.initializer ) # Re-initialize the training iterator while True: # Iterate over entire training set try: ##### Training Step ##### train_count += 1 cross_entropy, _ = sess.run( [cross_entropy_means[stage], train_steps[stage]], feed_dict={ handle: training_handle, is_training: True }) print( str(train_count) + " Current loss: " + str(cross_entropy)) except tf.errors.OutOfRangeError: # Thrown when end of training dataset is reached break # After every batch of training images: # - Run validation # - Save a checkpoint if the model has improved if train_count % cfg['train_steps_between_validation'] == 0: ##### Validation ##### print("Performing validation:") sess.run(val_iterator.initializer ) # Re-initialize the validation val_iterator sess.run(iou_metric_initializers[stage] ) # Reset the iou metric validation_metric_total = 0 validation_count = 0 while True: # Iterate over entire validation set try: sess.run(iou_metric_updaters[stage], feed_dict={ handle: validation_handle, is_training: False }) iou = sess.run(iou_metric_calculators[stage]) print( 'Cumulative mean IoU: ' + str(iou) ) # This is not the iou for a single image, but rather the iou calculated up to this point except tf.errors.OutOfRangeError: # Thrown when end of validation dataset is reached: break final_iou = iou ##### Save Checkpoint ##### # If this is the first checkpoint, or the model performed better than the best so far on the validation set if (best_validation_metric is None) or (final_iou > best_validation_metric): save_path = saver.save( sess, cfg['trained_checkpoint_filename']) print("Model saved in file: %s" % save_path) best_validation_metric = final_iou validation_did_not_improve_count = 0 # reset count else: print( "Not saving checkpoint, because model had worse performance on validation set." ) validation_did_not_improve_count += 1 if validation_did_not_improve_count >= 4: done_stage = True break