def get_input_encoder(self): config = self.config # SSD 300 layers predictor_sizes = [ self.model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], self.model.get_layer('fc7_mbox_conf').output_shape[1:3], self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] self.ssd_input_encoder = SSDInputEncoder( img_height=config.img_height, img_width=config.img_width, n_classes=self.n_classes, predictor_sizes=predictor_sizes, scales=config.scales, aspect_ratios_per_layer=config.aspect_ratios, two_boxes_for_ar1=config.two_boxes_for_ar1, steps=config.steps, offsets=config.offsets, clip_boxes=config.clip_boxes, variances=config.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=config.normalize_coords) print(f'created encoder with {self.n_classes} classes')
def main(): create_new_model = True if args.model_name == 'default' else False if create_new_model: K.clear_session() # Clear previous models from memory. model = ssd_512(image_size=(Config.img_height, Config.img_width, Config.img_channels), n_classes=Config.n_classes, mode='training', l2_regularization=Config.l2_regularization, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, normalize_coords=Config.normalize_coords, subtract_mean=Config.mean_color, swap_channels=Config.swap_channels) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) else: model_path = "weights/" + args.model_name + ".h5" # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'compute_loss': ssd_loss.compute_loss }) # Load the data train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_train.h5') val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_val.h5') train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) batch_size = args.batch_size # For the training generator: ssd_data_augmentation = SSDDataAugmentation(img_height=Config.img_height, img_width=Config.img_width, background=Config.mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=Config.img_height, width=Config.img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=Config.img_height, img_width=Config.img_width, n_classes=Config.n_classes, predictor_sizes=predictor_sizes, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=Config.normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) model_checkpoint = ModelCheckpoint( filepath=os.getcwd() + '/weights/ssd512_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=30) csv_logger = CSVLogger(filename='ssd512_training_log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule) terminate_on_nan = TerminateOnNaN() tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs, histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=False, write_images=False) callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan, tf_log ] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = args.final_epoch steps_per_epoch = 500 # Train/Fit the model if args.predict_mode == 'train': history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) # Prediction Output predict_generator = val_dataset.generate( batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) i = 0 for val in range(val_dataset_size): batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=Config.normalize_coords, img_height=Config.img_height, img_width=Config.img_width) # 5: Convert the predictions for the original image. y_pred_decoded_inv = apply_inverse_transforms( y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) plt.figure(figsize=(20, 12)) plt.imshow(batch_images[i]) current_axis = plt.gca() colors = plt.cm.hsv( np.linspace(0, 1, Config.n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'polyps' ] # Just so we can print class names onto the image instead of IDs for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) image = plt.gcf() plt.draw() image.savefig(os.getcwd() + "/val_ssd512val_" + str(val) + ".png", dpi=100)
def get_dataset( args: argparse.Namespace, model: Model ) -> Tuple[Iterable[List[np.array]], Iterable[List[np.array]], int]: train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) VOC_2007_images_dir = os.path.join(args.data_dir, '/VOC2007/JPEGImages/') VOC_2012_images_dir = os.path.join(args.data_dir, '/VOC2012/JPEGImages/') VOC_2007_annotations_dir = os.path.join(args.data_dir, '/VOC2007/Annotations/') VOC_2012_annotations_dir = os.path.join(args.data_dir, '/VOC2012/Annotations/') VOC_2007_trainval_image_set_filename = os.path.join( args.data_dir, '/VOC2007/ImageSets/Main/trainval.txt') VOC_2012_trainval_image_set_filename = os.path.join( args.data_dir, '/VOC2012/ImageSets/Main/trainval.txt') VOC_2007_test_image_set_filename = os.path.join( args.data_dir, '/VOC2007/ImageSets/Main/test.txt') classes = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] train_dataset.parse_xml( images_dirs=[VOC_2007_images_dir, VOC_2012_images_dir], image_set_filenames=[ VOC_2007_trainval_image_set_filename, VOC_2012_trainval_image_set_filename ], annotations_dirs=[VOC_2007_annotations_dir, VOC_2012_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml( images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_test_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) train_dataset.create_hdf5_dataset( file_path='dataset_pascal_voc_07+12_trainval.h5', resize=False, variable_image_size=True, verbose=True) val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', resize=False, variable_image_size=True, verbose=True) ssd_data_augmentation = SSDDataAugmentation(img_height=args.img_height, img_width=args.img_width, background=args.mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=args.img_height, width=args.img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=args.img_height, img_width=args.img_width, n_classes=args.n_classes, predictor_sizes=predictor_sizes, scales=args.scales, aspect_ratios_per_layer=args.aspect_ratios, two_boxes_for_ar1=args.two_boxes_for_ar1, steps=args.steps, offsets=args.offsets, clip_boxes=args.clip_boxes, variances=args.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=args.normalize_coords) train_generator = train_dataset.generate( batch_size=args.batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=args.batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) return train_generator, val_generator, val_dataset.get_dataset_size()
def main(): create_new_model = True if args.model_name == 'default' else False if create_new_model: K.clear_session() # Clear previous models from memory. model = build_model(image_size=(Config.img_height, Config.img_width, Config.img_channels), n_classes=Config.n_classes, mode='training', l2_regularization=Config.l2_regularization, scales=Config.scales, aspect_ratios_global=Config.aspect_ratios, aspect_ratios_per_layer=None, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, normalize_coords=Config.normalize_coords, subtract_mean=Config.intensity_mean, divide_by_stddev=Config.intensity_range) # model.load_weights("./weights/"+ args.model_name + ".h5", by_name=True) adam = Adam(lr=args.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) else: model_path = "weights/" + args.model_name + ".h5" # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'compute_loss': ssd_loss.compute_loss }) # Load the data train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_train.h5') val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_val.h5') train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) batch_size = args.batch_size # 4: Define the image processing chain. data_augmentation_chain = DataAugmentationConstantInputSize( random_brightness=(-48, 48, 0.5), random_contrast=(0.5, 1.8, 0.5), random_saturation=(0.5, 1.8, 0.5), random_hue=(18, 0.5), random_flip=0.5, random_translate=((0.03, 0.5), (0.03, 0.5), 0.5), random_scale=(0.5, 2.0, 0.5), n_trials_max=3, clip_boxes=True, overlap_criterion='area', bounds_box_filter=(0.3, 1.0), bounds_validator=(0.5, 1.0), n_boxes_min=1, background=(0, 0, 0)) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('classes4').output_shape[1:3], model.get_layer('classes5').output_shape[1:3], model.get_layer('classes6').output_shape[1:3], model.get_layer('classes7').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=Config.img_height, img_width=Config.img_width, n_classes=Config.n_classes, predictor_sizes=predictor_sizes, scales=Config.scales, aspect_ratios_global=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, normalize_coords=Config.normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[data_augmentation_chain], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) model_checkpoint = ModelCheckpoint( filepath=os.getcwd() + '/weights/ssd7_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) csv_logger = CSVLogger(filename='ssd7_training_log.csv', separator=',', append=True) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=10, verbose=1) reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=8, verbose=1, epsilon=0.001, cooldown=0, min_lr=0.00001) tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs, histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=False, write_images=False) callbacks = [model_checkpoint, csv_logger, reduce_learning_rate, tf_log] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = args.final_epoch steps_per_epoch = 1000 # Train/Fit the model if args.predict_mode == 'train': history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) # Prediction Output predict_generator = val_dataset.generate( batch_size=1, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'processed_labels', 'filenames'}, keep_images_without_gt=False) i = 0 for val in range(val_dataset_size): batch_images, batch_labels, batch_filenames = next(predict_generator) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.5, iou_threshold=0.5, top_k=200, normalize_coords=Config.normalize_coords, img_height=Config.img_height, img_width=Config.img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded[i]) plt.figure(figsize=(20, 12)) plt.imshow(batch_images[i]) current_axis = plt.gca() colors = plt.cm.hsv( np.linspace(0, 1, Config.n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'polyps' ] # Just so we can print class names onto the image instead of IDs # Draw the ground truth boxes in green (omit the label for more clarity) for box in batch_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) # Draw the predicted boxes in blue for box in y_pred_decoded[i]: xmin = box[-4] ymin = box[-3] xmax = box[-2] ymax = box[-1] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) image = plt.gcf() # plt.show() plt.draw() image.savefig(os.getcwd() + "/val_predictions/val_" + str(val) + ".png", dpi=100)
def _main_(args): print('Hello World! This is {:s}'.format(args.desc)) # config_path = args.conf # with open(config_path) as config_buffer: # config = json.loads(config_buffer.read()) ############################################################# # Set model parameters ############################################################# img_height = 300 # Height of the model input images img_width = 300 # Width of the model input images img_channels = 3 # Number of color channels of the model input images mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets scales = scales_pascal aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer. offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True ############################################################# # Create the model ############################################################# # 1: Build the Keras model. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # 2: Load some weights into the model. # 3: Instantiate an optimizer and the SSD loss function and compile the model. adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) ############################################################# # Prepare the data ############################################################# # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. VOC_2007_images_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages' VOC_2007_annotations_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations' VOC_2007_train_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt' VOC_2007_val_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt' # VOC_2007_trainval_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt' # VOC_2007_test_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt' classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_train_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_val_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5', resize=False, variable_image_size=True, verbose=True) val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', resize=False, variable_image_size=True, verbose=True) # 3: Set the batch size. batch_size = 8 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3]] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate(batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate(batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size)) ############################################################# # Kick off the training ############################################################# # Define model callbacks. model_checkpoint = ModelCheckpoint( filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan] # Train initial_epoch = 0 final_epoch = 120 steps_per_epoch = 1000 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) ############################################################# # Run the evaluation ############################################################# # 1: Set the generator for the predictions. predict_generator = val_dataset.generate(batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels'}, keep_images_without_gt=False) # 2: Generate samples. batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) i = 0 # Which batch item to look at print("Image:", batch_filenames[i]) print() print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) # 3: Make predictions. y_pred = model.predict(batch_images) # 4: Decode the raw predictions in `y_pred`. y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) # 5: Convert the predictions for the original image. y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) # 6: Draw the predicted boxes onto the image # Set the colors for the bounding boxes colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist() classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] plt.figure(figsize=(20, 12)) plt.imshow(batch_original_images[i]) current_axis = plt.gca() for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': 'green', 'alpha': 1.0}) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
def set_generator(self, train_images_dir, train_annotation_path, batch_size, val_images_dir=None, val_annotation_path=None): train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) train_dataset.parse_json(images_dirs=[train_images_dir], annotations_filenames=[train_annotation_path], ground_truth_available=True, include_classes='all', ret=False, verbose=True) train_dataset_size = train_dataset.get_dataset_size() if self.model_name == 'ssd_7': # Define the image processing chain. ssd_data_augmentation = DataAugmentationConstantInputSize( random_brightness=(-48, 48, 0.5), random_contrast=(0.5, 1.8, 0.5), random_saturation=(0.5, 1.8, 0.5), random_hue=(18, 0.5), random_flip=0.5, random_translate=((0.03, 0.5), (0.03, 0.5), 0.5), random_scale=(0.5, 2.0, 0.5), n_trials_max=3, clip_boxes=True, overlap_criterion='area', bounds_box_filter=(0.3, 1.0), bounds_validator=(0.5, 1.0), n_boxes_min=1, background=(0, 0, 0)) # Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ self.model.get_layer('classes4').output_shape[1:3], self.model.get_layer('classes5').output_shape[1:3], self.model.get_layer('classes6').output_shape[1:3], self.model.get_layer('classes7').output_shape[1:3] ] elif self.model_name == 'ssd_300': # For the training generator: ssd_data_augmentation = SSDDataAugmentation( img_height=self.image_size[0], img_width=self.image_size[1], background=self.mean_color) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ self.model.get_layer( 'conv4_3_norm_mbox_conf').output_shape[1:3], self.model.get_layer('fc7_mbox_conf').output_shape[1:3], self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] elif self.model_name == 'ssd_512': # For the training generator: ssd_data_augmentation = SSDDataAugmentation( img_height=self.image_size[0], img_width=self.image_size[1], background=self.mean_color) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ self.model.get_layer( 'conv4_3_norm_mbox_conf').output_shape[1:3], self.model.get_layer('fc7_mbox_conf').output_shape[1:3], self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=self.image_size[0], img_width=self.image_size[1], n_classes=self.n_classes, predictor_sizes=predictor_sizes, scales=self.scales, aspect_ratios_per_layer=self.aspect_ratios_per_layer, two_boxes_for_ar1=self.two_boxes_for_ar1, steps=self.steps, offsets=self.offsets, clip_boxes=self.clip_boxes, variances=self.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=self.normalize_coords) self.generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) self.steps_per_epoch = ceil(train_dataset_size / batch_size) if val_images_dir is not None and val_annotation_path is not None: val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) val_dataset.parse_json(images_dirs=[val_images_dir], annotations_filenames=[val_annotation_path], ground_truth_available=True, include_classes='all', ret=False, verbose=True) val_dataset_size = val_dataset.get_dataset_size() if self.model_name == 'ssd_300' or self.model_name == 'ssd_512': # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=self.image_size[0], width=self.image_size[1]) transformations = [convert_to_3_channels, resize] else: transformations = [] self.validation_data = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=transformations, label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) self.validation_steps = ceil(val_dataset_size / batch_size) else: self.validation_data = None self.validation_steps = None
def main(job_dir, **args): ##Setting up the path for saving logs logs_dir = job_dir + 'logs/' data_dir = "gs://deeplearningteam11/data" print("Current Directory: " + os.path.dirname(__file__)) print("Lets copy the data to: " + os.path.dirname(__file__)) os.system("gsutil -m cp -r " + data_dir + " " + os.path.dirname(__file__) + " > /dev/null 2>&1 ") #exit(0) with tf.device('/device:GPU:0'): # 1: Build the Keras model. K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # Set the path to the `.h5` file of the model to be loaded. model_file = file_io.FileIO('gs://deeplearningteam11/vgg19BNmodel.h5', mode='rb') # Store model locally on instance model_path = 'model.h5' with open(model_path, 'wb') as f: f.write(model_file.read()) model_file.close() ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'DecodeDetections': DecodeDetections, 'compute_loss': ssd_loss.compute_loss }) for layer in model.layers: layer.trainable = True model.summary() # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. # VOC 2007 # The directories that contain the images. VOC_2007_train_images_dir = 'data/data/VOC2007/train/JPEGImages/' VOC_2007_test_images_dir = 'data/data/VOC2007/test/JPEGImages/' VOC_2007_train_anns_dir = 'data/data/VOC2007/train/Annotations/' VOC_2007_test_anns_dir = 'data/data/VOC2007/test/Annotations/' # The paths to the image sets. VOC_2007_trainval_image_set_dir = 'data/data/VOC2007/train/ImageSets/Main/' VOC_2007_test_image_set_dir = 'data/data/VOC2007/test/ImageSets/Main/' VOC_2007_train_images_dir = os.path.dirname( __file__) + "/" + VOC_2007_train_images_dir VOC_2007_test_images_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_images_dir VOC_2007_train_anns_dir = os.path.dirname( __file__) + "/" + VOC_2007_train_anns_dir VOC_2007_test_anns_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_anns_dir VOC_2007_trainval_image_set_dir = os.path.dirname( __file__) + "/" + VOC_2007_trainval_image_set_dir VOC_2007_test_image_set_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_image_set_dir VOC_2007_trainval_image_set_filename = VOC_2007_trainval_image_set_dir + '/trainval.txt' VOC_2007_test_image_set_filename = VOC_2007_test_image_set_dir + '/test.txt' # The XML parser needs to now what object class names to look for and in which order to map them to integers. classes = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] print("Parsing Training Data ...") train_dataset.parse_xml( images_dirs=[VOC_2007_train_images_dir], image_set_filenames=[VOC_2007_trainval_image_set_filename], annotations_dirs=[VOC_2007_train_anns_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False, verbose=False) print("Done") print( "================================================================") print("Parsing Test Data ...") val_dataset.parse_xml( images_dirs=[VOC_2007_test_images_dir], image_set_filenames=[VOC_2007_test_image_set_filename], annotations_dirs=[VOC_2007_test_anns_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False, verbose=False) print("Done") print( "================================================================") # 3: Set the batch size. batch_size = 32 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. # For the training generator: ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_4_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3], model.get_layer('conv11_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) # Define a learning rate schedule. def lr_schedule(epoch): return 1e-6 # if epoch < 80: # return 0.001 # elif epoch < 100: # return 0.0001 # else: # return 0.00001 learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [learning_rate_scheduler, terminate_on_nan] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 120 final_epoch = 200 steps_per_epoch = 500 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) model_name = "vgg19BNmodel_cont.h5" model.save(model_name) with file_io.FileIO(model_name, mode='rb') as input_f: with file_io.FileIO("gs://deeplearningteam11/" + model_name, mode='w+') as output_f: output_f.write(input_f.read())
def train(optimizer, learning_rate, trial): model = create_network() # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. if optimizer == "sgd": opt = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) # elif optimizer == "momentum": # model.compile(tf.train.MomentumOptimizer(learning_rate, 0.9), "categorical_crossentropy", ["acc"]) # elif optimizer == "rmsprop": # model.compile(tf.train.RMSPropOptimizer(learning_rate), "categorical_crossentropy", ["acc"]) elif optimizer == "adam": opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=opt, loss=ssd_loss.compute_loss) # tpu_grpc_url = "grpc://"+os.environ["COLAB_TPU_ADDR"] # tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu_grpc_url) # strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver) # model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy) hist = History() truncate = OptunaCallback(trial) # model.fit_generator(train_gen, X_train.shape[0]//batch_size, callbacks=[hist, truncate], # validation_data=test_gen, validation_steps=X_test.shape[0]//batch_size, # epochs=50) callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, hist, truncate, # time_callback, # early_stopping, terminate_on_nan ] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = 50 steps_per_epoch = train_dataset_size // batch_size history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) history = hist.history return history
def create_generator(self, model, train_dataset, val_dataset): # 数据扩充链 data_augmentation_chain = DataAugmentationConstantInputSize( random_brightness=(-48, 48, 0.5), random_contrast=(0.5, 1.8, 0.5), random_saturation=(0.5, 1.8, 0.5), random_hue=(18, 0.5), random_flip=0.5, random_translate=((0.03, 0.5), (0.03, 0.5), 0.5), random_scale=(0.5, 2.0, 0.5), n_trials_max=3, clip_boxes=True, overlap_criterion='area', bounds_box_filter=(0.3, 1.0), bounds_validator=(0.5, 1.0), n_boxes_min=1, background=(0, 0, 0)) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's # predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('classes4').output_shape[1:3], model.get_layer('classes5').output_shape[1:3], model.get_layer('classes6').output_shape[1:3], model.get_layer('classes7').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=self.img_height, img_width=self.img_width, n_classes=self.n_classes, predictor_sizes=predictor_sizes, scales=self.scales, aspect_ratios_global=self.aspect_ratios, variances=self.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, normalize_coords=self.normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=self.batch_size, shuffle=True, transformations=[data_augmentation_chain], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=self.batch_size, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) return train_generator, val_generator
def _main_(args): config_path = args.conf with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) ############################### # Parse the annotations ############################### path_imgs_training = config['train']['train_image_folder'] path_anns_training = config['train']['train_annot_folder'] path_imgs_val = config['valid']['valid_image_folder'] path_anns_val = config['valid']['valid_annot_folder'] labels = config['model']['labels'] categories = {} #categories = {"Razor": 1, "Gun": 2, "Knife": 3, "Shuriken": 4} #la categoría 0 es la background for i in range(len(labels)): categories[labels[i]] = i + 1 print('\nTraining on: \t' + str(categories) + '\n') #################################### # Parameters ################################### #%% img_height = config['model']['input'] # Height of the model input images img_width = config['model']['input'] # Width of the model input images img_channels = 3 # Number of color channels of the model input images mean_color = [ 123, 117, 104 ] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [ 2, 1, 0 ] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = len( labels ) # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales_pascal = [ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets #scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets scales = scales_pascal aspect_ratios = [ [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0 ], [1.0, 2.0, 0.5 ], [1.0, 2.0, 0.5] ] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [ 8, 16, 32, 64, 100, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True K.clear_session() # Clear previous models from memory. model_path = config['train']['saved_weights_name'] # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. if config['model']['backend'] == 'ssd512': aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] steps = [ 8, 16, 32, 64, 100, 200, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05] elif config['model']['backend'] == 'ssd7': #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' scales = [ 0.08, 0.16, 0.32, 0.64, 0.96 ] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`. aspect_ratios = [0.5, 1.0, 2.0] # The list of aspect ratios for the anchor boxes two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1 steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended offsets = None if os.path.exists(model_path): print("\nLoading pretrained weights.\n") # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'compute_loss': ssd_loss.compute_loss }) else: #################################### # Build the Keras model. ################################### if config['model']['backend'] == 'ssd300': #weights_path = 'VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5' from models.keras_ssd300 import ssd_300 as ssd model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) elif config['model']['backend'] == 'ssd512': #weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5' from models.keras_ssd512 import ssd_512 as ssd # 2: Load some weights into the model. model = ssd(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, swap_channels=swap_channels) elif config['model']['backend'] == 'ssd7': #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' from models.keras_ssd7 import build_model as ssd scales = [ 0.08, 0.16, 0.32, 0.64, 0.96 ] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`. aspect_ratios = [ 0.5, 1.0, 2.0 ] # The list of aspect ratios for the anchor boxes two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1 steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended offsets = None model = ssd(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_global=aspect_ratios, aspect_ratios_per_layer=None, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=None, divide_by_stddev=None) else: print('Wrong Backend') print('OK create model') #sgd = SGD(lr=config['train']['learning_rate'], momentum=0.9, decay=0.0, nesterov=False) # TODO: Set the path to the weights you want to load. only for ssd300 or ssd512 weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' print("\nLoading pretrained weights VGG.\n") model.load_weights(weights_path, by_name=True) # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) optimizer = Adam(lr=config['train']['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=optimizer, loss=ssd_loss.compute_loss) model.summary() ##################################################################### # Instantiate two `DataGenerator` objects: One for training, one for validation. ###################################################################### # Optional: If you have enough memory, consider loading the images into memory for the reasons explained above. train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. # The XML parser needs to now what object class names to look for and in which order to map them to integers. classes = ['background'] + labels train_dataset.parse_xml( images_dirs=[config['train']['train_image_folder']], image_set_filenames=[config['train']['train_image_set_filename']], annotations_dirs=[config['train']['train_annot_folder']], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml( images_dirs=[config['valid']['valid_image_folder']], image_set_filenames=[config['valid']['valid_image_set_filename']], annotations_dirs=[config['valid']['valid_annot_folder']], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) ######################### # 3: Set the batch size. ######################### batch_size = config['train'][ 'batch_size'] # Change the batch size if you like, or if you run into GPU memory issues. ########################## # 4: Set the image transformations for pre-processing and data augmentation options. ########################## # For the training generator: # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) ######################################3 # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. ######################################### # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. if config['model']['backend'] == 'ssd512': predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) elif config['model']['backend'] == 'ssd300': predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) elif config['model']['backend'] == 'ssd7': predictor_sizes = [ model.get_layer('classes4').output_shape[1:3], model.get_layer('classes5').output_shape[1:3], model.get_layer('classes6').output_shape[1:3], model.get_layer('classes7').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_global=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, normalize_coords=normalize_coords) ####################### # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. ####################### train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ SSDDataAugmentation(img_height=img_height, img_width=img_width) ], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) ########################## # Define model callbacks. ######################### # TODO: Set the filepath under which you want to save the model. model_checkpoint = ModelCheckpoint( filepath=config['train']['saved_weights_name'], monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) #model_checkpoint.best = csv_logger = CSVLogger(filename='log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan ] #print(model.summary()) batch_images, batch_labels = next(train_generator) # i = 0 # Which batch item to look at # # print("Image:", batch_filenames[i]) # print() # print("Ground truth boxes:\n") # print(batch_labels[i]) initial_epoch = 0 final_epoch = config['train']['nb_epochs'] #final_epoch = 20 steps_per_epoch = 500 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch, verbose=1 if config['train']['debug'] else 2)
def run(train_dir, valid_dir, set_dir, model_dir): # train_dir = arguments.train_dir # valid_dir = arguments.valid_dir train_dataset_dir = train_dir train_annot_dir = train_dir + '/annot/' train_set = train_dir + '/img_set.txt' valid_dataset_dir = valid_dir valid_annot_dir = valid_dir + '/annot/' valid_set = valid_dir + '/valid_set.txt' # Set Training and Validation dataset paths batch_size = 16 print('Using batch size of: {}'.format(batch_size)) #model_path = 'COCO_512.h5' model_path = model_dir # model_path = 'saved_model.h5' # Needs to know classes and order to map to integers classes = ['background', 'car', 'bus', 'truck'] # Set required parameters for training of SSD img_height = 512 img_width = 512 img_channels = 3 # Colour image mean_color = [123, 117, 104] # DO NOT CHANGE swap_channels = [2, 1, 0] # Original SSD used BGR n_classes = 3 # 80 for COCO scales_coco = [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9, 1.06] scales = scales_coco aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] two_boxes_for_ar1 = True steps = [8, 16, 32, 64, 128, 256, 512] offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] clip_boxes = False variances = [0.1, 0.1, 0.2, 0.2] normalize_coords = True K.clear_session() model = ssd_512(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) model.load_weights(model_path, by_name=True) sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=sgd, loss=ssd_loss.compute_loss) # model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes, # 'L2Normalization': L2Normalization, # 'compute_loss': ssd_loss.compute_loss}) # Create Data Generators for train and valid sets train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) valid_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) train_dataset.parse_xml(images_dirs=[train_dataset_dir], image_set_filenames=[train_set], annotations_dirs=[train_annot_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) valid_dataset.parse_xml(images_dirs=[valid_dataset_dir], image_set_filenames=[valid_set], annotations_dirs=[valid_annot_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) # Will speed up trainig but requires more memory # Can comment out to avoid memory requirements ''' train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5', resize=False, variable_image_size=True, verbose=True) valid_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', resize=False, variable_image_size=True, verbose=True) ''' ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = valid_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() valid_dataset_size = valid_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( valid_dataset_size)) model_checkpoint = ModelCheckpoint( filepath= 'ssd_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) #csv_logger = CSVLogger(filename='ssd512_training_log.csv', # separator=',', # append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan ] #callbacks = [learning_rate_scheduler, # terminate_on_nan] initial_epoch = 0 final_epoch = 150 # 150 steps_per_epoch = math.ceil(119 / batch_size) # ceil(num_samples/batch_size) # Training history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=math.ceil( valid_dataset_size / batch_size), initial_epoch=initial_epoch) # Save final trained model model.save('trained.h5') # Make predictions predict_generator = valid_dataset.generate( batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) i = 0 # Which batch item to look at print("Image:", batch_filenames[i]) print() print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.2, iou_threshold=0.4, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) # Set the colors for the bounding boxes colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist() # classes = ['background', 'car', 'bus', 'truck', 'motorbike'] # Already set at start plt.figure(figsize=(20, 12)) plt.imshow(batch_original_images[i]) current_axis = plt.gca() for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) plt.show() return
def ssd_model(config: Dict, train_dataset, val_dataset, callbacks_list): """Training SSD model Parameters ---------- config : Dict Config yaml/json containing all parameter """ start_train = timer() img_height = config['training']['img_height'] # Height input images img_width = config['training']['img_width'] # Width input images img_channels = config['training'][ 'img_channels'] # Number of color channels n_classes = config['training'][ 'n_classes'] # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=config['training']['l2_regularization'], scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) weights_path = './weights/VGG_ILSVRC_16_layers_fc_reduced.h5' model.load_weights(weights_path, by_name=True) adam = Adam(lr=config['training']['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) batch_size = config['training']['batch_size'] ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print( f"[INFO]...Number of images in the training dataset: {train_dataset_size}" ) print( f"[INFO]...Number of images in the validation dataset: {val_dataset_size}" ) print( f"[INFO]...Weights will be saved at {config['training']['weight_save_path']}" ) history = model.fit_generator( generator=train_generator, steps_per_epoch=config['training']['steps_per_epoch'], epochs=config['training']['epochs'], callbacks=callbacks_list, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size)) end_train = timer() print( f"[INFO]...Total time taken by Training Job is {(end_train - start_train)/60:.2f} min(s)" )
def train_VOC(config): ''' Train the given configuration ; the configuration must be constructed according to the utility script found in utils/generateconfig.py. Arguments: config : the configuration of the model to use ; should already be loaded ''' ################################### ### PATHS AND PARAMETERS ################################## datadir = config.DATA_DIR local_dir = config.ROOT_FOLDER img_shape = config.IMG_SHAPE classes = config.CLASSES checkpoint_output = os.path.join(local_dir, 'models', config.CHECKPOINT_NAME) model_output = os.path.join(local_dir, 'models', config.MODEL_NAME) img_height = img_shape[0] # Height of the model input images img_width = img_shape[1] # Width of the model input images img_channels = img_shape[ 2] # Number of color channels of the model input images mean_color = [ 123, 117, 104 ] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [ 2, 1, 0 ] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales_pascal = [ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets scales = scales_pascal aspect_ratios = [ [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0 ], [1.0, 2.0, 0.5 ], [1.0, 2.0, 0.5] ] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [ 8, 16, 32, 64, 100, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True batch_size = config.BATCH_SIZE # Change the batch size if you like, or if you run into GPU memory issues. ################################### ### BUILDING MODEL ################################## K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) weights_path = os.path.join(local_dir, 'weights', 'VGG_VOC0712_SSD_300x300_iter_120000.h5') model.load_weights(weights_path, by_name=True) #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=sgd, loss=ssd_loss.compute_loss) ################################### ### LOADING DATAS ################################## train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) images_dir = os.path.join(datadir, 'Images') annotations_dir = os.path.join(datadir, 'Annotations') trainval_image_set_filename = os.path.join(datadir, 'ImageSets', 'train.txt') test_image_set_filename = os.path.join(datadir, 'ImageSets', 'val.txt') # The XML parser needs to now what object class names to look for and in which order to map them to integers. # train_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[trainval_image_set_filename], annotations_dirs=[annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[test_image_set_filename], annotations_dirs=[annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) train_dataset.create_hdf5_dataset(file_path='flowers_train.h5', resize=False, variable_image_size=True, verbose=True) val_dataset.create_hdf5_dataset(file_path='flowers_val.h5', resize=False, variable_image_size=True, verbose=True) ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) ################################### ### PREPARE TRAINING ################################## def lr_schedule(epoch): if epoch < 80: return 0.001 elif epoch < 100: return 0.0001 else: return 0.00001 model_checkpoint = ModelCheckpoint(filepath=checkpoint_output, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=10, verbose=1) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ model_checkpoint, learning_rate_scheduler, terminate_on_nan, early_stopping ] ################################### ### TRAINING ################################## epochs = config.EPOCHS steps_per_epoch = ceil(train_dataset_size / batch_size) model.summary() history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size)) model.save(model_output)
predictor_sizes = [ model.get_layer('classes4').output_shape[1:3], model.get_layer('classes5').output_shape[1:3], model.get_layer('classes6').output_shape[1:3], model.get_layer('classes7').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_global=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, # 这里 clip 的是 anchor boxes clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, border_pixels='half', coords='centroids', normalize_coords=normalize_coords, ) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=(data_augmentation_chain, ),
def train(data_path): K.clear_session() # Clear previous models from memory. model = init_Model(trained_weight_path) img_dir = os.path.join(path2, 'dataset', data_path, 'original') labels_filename = os.path.join(path2, 'dataset', data_path, 'csv', 'labels_train.csv') mask_path = os.path.join(path2, 'dataset', data_path, 'mask_richtig_4') data_input_class = Data_Input_Class(labels_filename = labels_filename, images_dir=img_dir, mask_groundTruth_Path= mask_path) images, filenames, labels, image_ids = data_input_class.parse_csv(ret = True) images = np.asarray(images) filenames = np.asarray(filenames) labels = np.asarray(labels) image_ids = np.asarray(image_ids) ''' print('shape of images:', images.shape) print('shape of filenames:', filenames.shape) print('shape of labels:', labels.shape) print('shape of image_ids:', image_ids.shape) print(labels[1]) print(image_ids) print(filenames[0:5]) ''' predictor_sizes = [ model.get_layer('classes3_3_add').output_shape[1:3], model.get_layer('classes4').output_shape[1:3], model.get_layer('classes5').output_shape[1:3], model.get_layer('classes6').output_shape[1:3], model.get_layer('classes7').output_shape[1:3]] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_global=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, normalize_coords=normalize_coords) batch_X, batch_y_encoded = data_input_class.get_encoded_boxlabel(batch_size = 200, label_encoder=ssd_input_encoder) mask, class_weight = data_input_class.get_mask_label() ''' print('type of batch_x:', type(batch_X)) print('shape of batch_x:', batch_X.shape) print('type of batch_y:', type(batch_y_encoded)) print('shape of batch_y_encoded:', batch_y_encoded.shape) ''' print('shape of mask:', mask.shape) print('shape of class_weight:', class_weight.shape) #tb = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, batch_size=batch_size, write_graph=False, write_grads=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None) #chk = keras.callbacks.ModelCheckpoint(mdl_path, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) #redu = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=reduce_factor, patience=num_patience, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0) epoch = 50 batch_size = 1 #weight_saved_path = 'E:/0525/new_Version/ssd_keras-master-copy/weight_change/detection_segmentation/' weight_saved_path = os.path.join(path2, 'dataset', data_path, 'weight', 'ssd_segmentation_detection.h5') chk = keras.callbacks.ModelCheckpoint(weight_saved_path, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) redu = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0) model.fit(images, [batch_y_encoded, mask], validation_split=0.2, epochs=epoch, batch_size=batch_size, callbacks=[chk], verbose=1, class_weight=[None,class_weight], shuffle = True)
model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder,
def load_VOC_IMG_generators(self,model): print('Making VOC image generators') datadir = self.datas['DATA_PATH'] train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) test_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) images_dir = os.path.join(datadir,'Images') annotations_dir = os.path.join(datadir,'Annotations') train_image_set_filename = os.path.join(datadir,'ImageSets','train.txt') val_image_set_filename = os.path.join(datadir,'ImageSets','val.txt') test_image_set_filename = os.path.join(datadir,'ImageSets','test.txt') generator_options = self.datas['GENERATOR'] train_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[train_image_set_filename], annotations_dirs=[annotations_dir], classes=self.datas['CLASSES'], include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[val_image_set_filename], annotations_dirs=[annotations_dir], classes=self.datas['CLASSES'], include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) test_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[test_image_set_filename], annotations_dirs=[annotations_dir], classes=self.datas['CLASSES'], include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) convert_to_3_channels = ConvertTo3Channels() target_size = generator_options['TARGET_SIZE'] resize = Resize(height=target_size[0], width=target_size[1]) predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3]] scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets scales = scales_pascal aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer. two_boxes_for_ar1 = True mean_color=[123,117,104] #TODO : add this as a parameter offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] clip_boxes=False variances=[0.1, 0.1, 0.2, 0.2] normalize_coords=True ssd_input_encoder = SSDInputEncoder(img_height = target_size[0], img_width = target_size[1], n_classes = 20, #TODO : handle subsampling predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords ) train_generator = train_dataset.generate(batch_size=generator_options['BATCH_SIZE'], shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate(batch_size=generator_options['BATCH_SIZE'], shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) test_generator = test_dataset.generate(batch_size=generator_options['BATCH_SIZE'], shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) return [train_generator,train_dataset.get_dataset_size()],[val_generator,val_dataset.get_dataset_size()],[test_generator,train_dataset.get_dataset_size()]