def data_augment(x_train, y_train, batch_size, val_split=0.25, task='classification', num_classes=None): h, w = x_train.shape[1:3] # Shuffle training data and split some for validation seed = 1 m = len(x_train) np.random.seed(seed) index = np.arange(0, m) random.shuffle(index) x_train = x_train[index] y_train = y_train[index] split = int(m * (1 - val_split)) x_train, x_val = x_train[:split], x_train[split:] y_train, y_val = y_train[:split], y_train[split:] #Use shift、flip and rotation to augment data if task == 'classification': datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, validation_split=val_split, horizontal_flip=True) train_flow = datagen.flow(x_train, y_train, batch_size=batch_size) elif task == 'detection_single': ssd_data_augmentation = SSDDataAugmentation(img_height=h, img_width=w) normalize = Normalize(h, w) gen = DataGenerator() box = y_train[:, -4:] * [w, h, w, h] class_id = np.argmax(y_train[:, :-4, np.newaxis], axis=1) labels = np.hstack([class_id, box]) labels = labels[:, np.newaxis, :].tolist() gen.images = x_train * 255 gen.labels = labels gen.dataset_size = len(labels) gen.dataset_indices = np.arange(gen.dataset_size) gen.filenames = ['x' for i in range(gen.dataset_size)] train_flow = gen.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation, normalize], label_encoder=PetLabelEncoder(num_classes), returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) datagen = ImageDataGenerator() val_flow = datagen.flow(x_val, y_val, batch_size=batch_size) return (train_flow, val_flow)
def train_VOC(config): ''' Train the given configuration ; the configuration must be constructed according to the utility script found in utils/generateconfig.py. Arguments: config : the configuration of the model to use ; should already be loaded ''' ################################### ### PATHS AND PARAMETERS ################################## datadir = config.DATA_DIR local_dir = config.ROOT_FOLDER img_shape = config.IMG_SHAPE classes = config.CLASSES checkpoint_output = os.path.join(local_dir, 'models', config.CHECKPOINT_NAME) model_output = os.path.join(local_dir, 'models', config.MODEL_NAME) img_height = img_shape[0] # Height of the model input images img_width = img_shape[1] # Width of the model input images img_channels = img_shape[ 2] # Number of color channels of the model input images mean_color = [ 123, 117, 104 ] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [ 2, 1, 0 ] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales_pascal = [ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets scales = scales_pascal aspect_ratios = [ [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0 ], [1.0, 2.0, 0.5 ], [1.0, 2.0, 0.5] ] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [ 8, 16, 32, 64, 100, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True batch_size = config.BATCH_SIZE # Change the batch size if you like, or if you run into GPU memory issues. ################################### ### BUILDING MODEL ################################## K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) weights_path = os.path.join(local_dir, 'weights', 'VGG_VOC0712_SSD_300x300_iter_120000.h5') model.load_weights(weights_path, by_name=True) #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=sgd, loss=ssd_loss.compute_loss) ################################### ### LOADING DATAS ################################## train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) images_dir = os.path.join(datadir, 'Images') annotations_dir = os.path.join(datadir, 'Annotations') trainval_image_set_filename = os.path.join(datadir, 'ImageSets', 'train.txt') test_image_set_filename = os.path.join(datadir, 'ImageSets', 'val.txt') # The XML parser needs to now what object class names to look for and in which order to map them to integers. # train_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[trainval_image_set_filename], annotations_dirs=[annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[test_image_set_filename], annotations_dirs=[annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) train_dataset.create_hdf5_dataset(file_path='flowers_train.h5', resize=False, variable_image_size=True, verbose=True) val_dataset.create_hdf5_dataset(file_path='flowers_val.h5', resize=False, variable_image_size=True, verbose=True) ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) ################################### ### PREPARE TRAINING ################################## def lr_schedule(epoch): if epoch < 80: return 0.001 elif epoch < 100: return 0.0001 else: return 0.00001 model_checkpoint = ModelCheckpoint(filepath=checkpoint_output, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=10, verbose=1) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ model_checkpoint, learning_rate_scheduler, terminate_on_nan, early_stopping ] ################################### ### TRAINING ################################## epochs = config.EPOCHS steps_per_epoch = ceil(train_dataset_size / batch_size) model.summary() history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size)) model.save(model_output)
two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size()
def test_config(config): ''' Test the given configuration ; the configuration should already have been used for training purposes, or this will return an error (see ssd_train.py) Arguments: config : the configuration of the model to use ; should already be loaded. ''' local_dir = config.ROOT_FOLDER data_dir = config.DATA_DIR img_shape = config.IMG_SHAPE img_height = img_shape[0] # Height of the model input images img_width = img_shape[1] # Width of the model input images img_channels = img_shape[ 2] # Number of color channels of the model input images n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO normalize_coords = True K.clear_session() # Clear previous models from memory. print("[INFO] loading model...") model_path = os.path.join(local_dir, 'models', config.MODEL_NAME) # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0) model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'DecodeDetections': DecodeDetections, 'compute_loss': ssd_loss.compute_loss }) classes = config.CLASSES dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) dataset_images_dir = os.path.join(data_dir, 'Images') dataset_annotations_dir = os.path.join(data_dir, 'Annotations/') dataset_test_image_set_filename = os.path.join(data_dir, 'ImageSets\\test.txt') dataset.parse_xml(images_dirs=[dataset_images_dir], image_set_filenames=[dataset_test_image_set_filename], annotations_dirs=[dataset_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) dataset.create_hdf5_dataset(file_path=config.MODEL_NAME, resize=False, variable_image_size=True, verbose=True) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) dataset_size = dataset.get_dataset_size() print("Number of images in the dataset:\t{:>6}".format(dataset_size)) predict_generator = dataset.generate( batch_size=config.PREDICT_BATCH_SIZE, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) count = 0 while True and count < dataset_size: batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) i = 0 print("Image:", batch_filenames[i]) print() print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) y_pred_decoded_inv = apply_inverse_transforms( y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) # cv2.imshow('original image',batch_original_images[i]) # cv2.waitKey(800) # cv2.destroyAllWindows() colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist() plt.figure(figsize=(15, 8)) plt.imshow(batch_original_images[i]) current_axis = plt.gca() len_orig = 0 for box in batch_original_labels[i]: len_orig += 1 xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) len_found = 0 for box in y_pred_decoded_inv[i]: len_found += 1 xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) print('Number of original boxes : {}'.format(len_orig)) print('Number of found boxes : {}'.format(len_found)) plt.show() count += 1
def main(): create_new_model = True if args.model_name == 'default' else False if create_new_model: K.clear_session() # Clear previous models from memory. model = ssd_512(image_size=(Config.img_height, Config.img_width, Config.img_channels), n_classes=Config.n_classes, mode='training', l2_regularization=Config.l2_regularization, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, normalize_coords=Config.normalize_coords, subtract_mean=Config.mean_color, swap_channels=Config.swap_channels) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) else: model_path = "weights/" + args.model_name + ".h5" # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'compute_loss': ssd_loss.compute_loss }) # Load the data train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_train.h5') val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_val.h5') train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) batch_size = args.batch_size # For the training generator: ssd_data_augmentation = SSDDataAugmentation(img_height=Config.img_height, img_width=Config.img_width, background=Config.mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=Config.img_height, width=Config.img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=Config.img_height, img_width=Config.img_width, n_classes=Config.n_classes, predictor_sizes=predictor_sizes, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=Config.normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) model_checkpoint = ModelCheckpoint( filepath=os.getcwd() + '/weights/ssd512_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=30) csv_logger = CSVLogger(filename='ssd512_training_log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule) terminate_on_nan = TerminateOnNaN() tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs, histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=False, write_images=False) callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan, tf_log ] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = args.final_epoch steps_per_epoch = 500 # Train/Fit the model if args.predict_mode == 'train': history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) # Prediction Output predict_generator = val_dataset.generate( batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) i = 0 for val in range(val_dataset_size): batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=Config.normalize_coords, img_height=Config.img_height, img_width=Config.img_width) # 5: Convert the predictions for the original image. y_pred_decoded_inv = apply_inverse_transforms( y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) plt.figure(figsize=(20, 12)) plt.imshow(batch_images[i]) current_axis = plt.gca() colors = plt.cm.hsv( np.linspace(0, 1, Config.n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'polyps' ] # Just so we can print class names onto the image instead of IDs for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) image = plt.gcf() plt.draw() image.savefig(os.getcwd() + "/val_ssd512val_" + str(val) + ".png", dpi=100)
def get_dataset( args: argparse.Namespace, model: Model ) -> Tuple[Iterable[List[np.array]], Iterable[List[np.array]], int]: train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) VOC_2007_images_dir = os.path.join(args.data_dir, '/VOC2007/JPEGImages/') VOC_2012_images_dir = os.path.join(args.data_dir, '/VOC2012/JPEGImages/') VOC_2007_annotations_dir = os.path.join(args.data_dir, '/VOC2007/Annotations/') VOC_2012_annotations_dir = os.path.join(args.data_dir, '/VOC2012/Annotations/') VOC_2007_trainval_image_set_filename = os.path.join( args.data_dir, '/VOC2007/ImageSets/Main/trainval.txt') VOC_2012_trainval_image_set_filename = os.path.join( args.data_dir, '/VOC2012/ImageSets/Main/trainval.txt') VOC_2007_test_image_set_filename = os.path.join( args.data_dir, '/VOC2007/ImageSets/Main/test.txt') classes = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] train_dataset.parse_xml( images_dirs=[VOC_2007_images_dir, VOC_2012_images_dir], image_set_filenames=[ VOC_2007_trainval_image_set_filename, VOC_2012_trainval_image_set_filename ], annotations_dirs=[VOC_2007_annotations_dir, VOC_2012_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml( images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_test_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) train_dataset.create_hdf5_dataset( file_path='dataset_pascal_voc_07+12_trainval.h5', resize=False, variable_image_size=True, verbose=True) val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', resize=False, variable_image_size=True, verbose=True) ssd_data_augmentation = SSDDataAugmentation(img_height=args.img_height, img_width=args.img_width, background=args.mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=args.img_height, width=args.img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=args.img_height, img_width=args.img_width, n_classes=args.n_classes, predictor_sizes=predictor_sizes, scales=args.scales, aspect_ratios_per_layer=args.aspect_ratios, two_boxes_for_ar1=args.two_boxes_for_ar1, steps=args.steps, offsets=args.offsets, clip_boxes=args.clip_boxes, variances=args.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=args.normalize_coords) train_generator = train_dataset.generate( batch_size=args.batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=args.batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) return train_generator, val_generator, val_dataset.get_dataset_size()
def main(): create_new_model = True if args.model_name == 'default' else False if create_new_model: K.clear_session() # Clear previous models from memory. model = build_model(image_size=(Config.img_height, Config.img_width, Config.img_channels), n_classes=Config.n_classes, mode='training', l2_regularization=Config.l2_regularization, scales=Config.scales, aspect_ratios_global=Config.aspect_ratios, aspect_ratios_per_layer=None, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, normalize_coords=Config.normalize_coords, subtract_mean=Config.intensity_mean, divide_by_stddev=Config.intensity_range) # model.load_weights("./weights/"+ args.model_name + ".h5", by_name=True) adam = Adam(lr=args.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) else: model_path = "weights/" + args.model_name + ".h5" # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'compute_loss': ssd_loss.compute_loss }) # Load the data train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_train.h5') val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_val.h5') train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) batch_size = args.batch_size # 4: Define the image processing chain. data_augmentation_chain = DataAugmentationConstantInputSize( random_brightness=(-48, 48, 0.5), random_contrast=(0.5, 1.8, 0.5), random_saturation=(0.5, 1.8, 0.5), random_hue=(18, 0.5), random_flip=0.5, random_translate=((0.03, 0.5), (0.03, 0.5), 0.5), random_scale=(0.5, 2.0, 0.5), n_trials_max=3, clip_boxes=True, overlap_criterion='area', bounds_box_filter=(0.3, 1.0), bounds_validator=(0.5, 1.0), n_boxes_min=1, background=(0, 0, 0)) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('classes4').output_shape[1:3], model.get_layer('classes5').output_shape[1:3], model.get_layer('classes6').output_shape[1:3], model.get_layer('classes7').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=Config.img_height, img_width=Config.img_width, n_classes=Config.n_classes, predictor_sizes=predictor_sizes, scales=Config.scales, aspect_ratios_global=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, normalize_coords=Config.normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[data_augmentation_chain], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) model_checkpoint = ModelCheckpoint( filepath=os.getcwd() + '/weights/ssd7_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) csv_logger = CSVLogger(filename='ssd7_training_log.csv', separator=',', append=True) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=10, verbose=1) reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=8, verbose=1, epsilon=0.001, cooldown=0, min_lr=0.00001) tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs, histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=False, write_images=False) callbacks = [model_checkpoint, csv_logger, reduce_learning_rate, tf_log] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = args.final_epoch steps_per_epoch = 1000 # Train/Fit the model if args.predict_mode == 'train': history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) # Prediction Output predict_generator = val_dataset.generate( batch_size=1, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'processed_labels', 'filenames'}, keep_images_without_gt=False) i = 0 for val in range(val_dataset_size): batch_images, batch_labels, batch_filenames = next(predict_generator) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.5, iou_threshold=0.5, top_k=200, normalize_coords=Config.normalize_coords, img_height=Config.img_height, img_width=Config.img_width) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded[i]) plt.figure(figsize=(20, 12)) plt.imshow(batch_images[i]) current_axis = plt.gca() colors = plt.cm.hsv( np.linspace(0, 1, Config.n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'polyps' ] # Just so we can print class names onto the image instead of IDs # Draw the ground truth boxes in green (omit the label for more clarity) for box in batch_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) # Draw the predicted boxes in blue for box in y_pred_decoded[i]: xmin = box[-4] ymin = box[-3] xmax = box[-2] ymax = box[-1] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) image = plt.gcf() # plt.show() plt.draw() image.savefig(os.getcwd() + "/val_predictions/val_" + str(val) + ".png", dpi=100)
normalize_coords = True img_height = 300 # Height of the model input images img_width = 300 # Width of the model input images n_classes = 1 # Set the generator for the predictions. val_dataset = DataGenerator() val_dataset.parse_csv(images_dir=images_folder, labels_filename=labels, include_classes='all') predict_generator = val_dataset.generate( batch_size=1, shuffle=False, transformations=[], label_encoder=None, returns={'processed_images', 'original_labels'}, keep_images_without_gt=False) val_dataset_size = val_dataset.get_dataset_size() # number of test set images to use during evaluation # number of bound not cut num_objects = 0 for i in range(nb_test_images): batch_images, batch_original_labels = next(predict_generator) num_objects += len( batch_original_labels[0][batch_original_labels[0][:, 0] == 1])
steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 1: Set the generator for the predictions. predict_generator = val_dataset.generate( batch_size=val_dataset.get_dataset_size(), shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) train_batch_size = 10 data_generator = train_dataset.generate( batch_size=train_batch_size, shuffle=False, transformations=[ssd_data_augmentation], label_encoder=None, returns={ 'processed_images', 'processed_labels', 'filenames', 'original_images', 'original_labels' }, keep_images_without_gt=False)
def _main_(args): config_path = args.conf with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) ############################### # Parse the annotations ############################### path_imgs_training = config['train']['train_image_folder'] path_anns_training = config['train']['train_annot_folder'] path_imgs_val = config['valid']['valid_image_folder'] path_anns_val = config['valid']['valid_annot_folder'] labels = config['model']['labels'] categories = {} #categories = {"Razor": 1, "Gun": 2, "Knife": 3, "Shuriken": 4} #la categoría 0 es la background for i in range(len(labels)): categories[labels[i]] = i + 1 print('\nTraining on: \t' + str(categories) + '\n') #################################### # Parameters ################################### #%% img_height = config['model']['input'] # Height of the model input images img_width = config['model']['input'] # Width of the model input images img_channels = 3 # Number of color channels of the model input images mean_color = [ 123, 117, 104 ] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [ 2, 1, 0 ] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = len( labels ) # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales_pascal = [ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets #scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets scales = scales_pascal aspect_ratios = [ [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0 ], [1.0, 2.0, 0.5 ], [1.0, 2.0, 0.5] ] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [ 8, 16, 32, 64, 100, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True K.clear_session() # Clear previous models from memory. model_path = config['train']['saved_weights_name'] # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. if config['model']['backend'] == 'ssd512': aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] steps = [ 8, 16, 32, 64, 100, 200, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05] elif config['model']['backend'] == 'ssd7': #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' scales = [ 0.08, 0.16, 0.32, 0.64, 0.96 ] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`. aspect_ratios = [0.5, 1.0, 2.0] # The list of aspect ratios for the anchor boxes two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1 steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended offsets = None if os.path.exists(model_path): print("\nLoading pretrained weights.\n") # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'compute_loss': ssd_loss.compute_loss }) else: #################################### # Build the Keras model. ################################### if config['model']['backend'] == 'ssd300': #weights_path = 'VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5' from models.keras_ssd300 import ssd_300 as ssd model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) elif config['model']['backend'] == 'ssd512': #weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5' from models.keras_ssd512 import ssd_512 as ssd # 2: Load some weights into the model. model = ssd(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, swap_channels=swap_channels) elif config['model']['backend'] == 'ssd7': #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' from models.keras_ssd7 import build_model as ssd scales = [ 0.08, 0.16, 0.32, 0.64, 0.96 ] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`. aspect_ratios = [ 0.5, 1.0, 2.0 ] # The list of aspect ratios for the anchor boxes two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1 steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended offsets = None model = ssd(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_global=aspect_ratios, aspect_ratios_per_layer=None, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=None, divide_by_stddev=None) else: print('Wrong Backend') print('OK create model') #sgd = SGD(lr=config['train']['learning_rate'], momentum=0.9, decay=0.0, nesterov=False) # TODO: Set the path to the weights you want to load. only for ssd300 or ssd512 weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' print("\nLoading pretrained weights VGG.\n") model.load_weights(weights_path, by_name=True) # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) optimizer = Adam(lr=config['train']['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=optimizer, loss=ssd_loss.compute_loss) model.summary() ##################################################################### # Instantiate two `DataGenerator` objects: One for training, one for validation. ###################################################################### # Optional: If you have enough memory, consider loading the images into memory for the reasons explained above. train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. # The XML parser needs to now what object class names to look for and in which order to map them to integers. classes = ['background'] + labels train_dataset.parse_xml( images_dirs=[config['train']['train_image_folder']], image_set_filenames=[config['train']['train_image_set_filename']], annotations_dirs=[config['train']['train_annot_folder']], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml( images_dirs=[config['valid']['valid_image_folder']], image_set_filenames=[config['valid']['valid_image_set_filename']], annotations_dirs=[config['valid']['valid_annot_folder']], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) ######################### # 3: Set the batch size. ######################### batch_size = config['train'][ 'batch_size'] # Change the batch size if you like, or if you run into GPU memory issues. ########################## # 4: Set the image transformations for pre-processing and data augmentation options. ########################## # For the training generator: # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) ######################################3 # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. ######################################### # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. if config['model']['backend'] == 'ssd512': predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) elif config['model']['backend'] == 'ssd300': predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) elif config['model']['backend'] == 'ssd7': predictor_sizes = [ model.get_layer('classes4').output_shape[1:3], model.get_layer('classes5').output_shape[1:3], model.get_layer('classes6').output_shape[1:3], model.get_layer('classes7').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_global=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, normalize_coords=normalize_coords) ####################### # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. ####################### train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ SSDDataAugmentation(img_height=img_height, img_width=img_width) ], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) ########################## # Define model callbacks. ######################### # TODO: Set the filepath under which you want to save the model. model_checkpoint = ModelCheckpoint( filepath=config['train']['saved_weights_name'], monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) #model_checkpoint.best = csv_logger = CSVLogger(filename='log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan ] #print(model.summary()) batch_images, batch_labels = next(train_generator) # i = 0 # Which batch item to look at # # print("Image:", batch_filenames[i]) # print() # print("Ground truth boxes:\n") # print(batch_labels[i]) initial_epoch = 0 final_epoch = config['train']['nb_epochs'] #final_epoch = 20 steps_per_epoch = 500 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch, verbose=1 if config['train']['debug'] else 2)
model.get_layer('P4').output_shape[1:3], model.get_layer('P5').output_shape[1:3], model.get_layer('P6').output_shape[1:3], model.get_layer('P7').output_shape[1:3] ] # predictor_sizes = [[40,40],[20,20],[10,10],[5,5],[3,3]] ssd_input_encoder = FCOSInputEncoder(img_height=cfgs.IMG_SHORT_SIDE_LEN, img_width=cfgs.IMG_SHORT_SIDE_LEN, n_classes=cfgs.CLASS_NUM, predictor_sizes=predictor_sizes) train_generator = train_dataset.generate( batch_size=cfgs.BATCH_SIZE, shuffle=True, transformations=[data_augmentation_chain], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=cfgs.BATCH_SIZE, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) visual_generator = test_dataset.generate( batch_size=cfgs.BATCH_SIZE, shuffle=False, transformations=[],
# plt.figure(figsize=(20,12)) # plt.plot(history.history['loss'], label='loss') # plt.plot(history.history['val_loss'], label='val_loss') # plt.legend(loc='upper right', prop={'size': 24}) # Predict predict_generator = val_dataset.generate(batch_size=1, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'processed_labels', 'filenames', 'encoded_labels', 'matched_anchors', 'image_ids', 'original_images', 'original_labels'}, keep_images_without_gt=False) processed_images, encoded_labels,matched_anchors, processed_labels, batch_filenames, image_ids, original_images, original_labels = next(predict_generator) i = 0 # Which batch item to look at while not batch_filenames[i]== "../datasets/udacity_driving_datasets/1478899063275530566.jpg":
aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # Create the generator handles that will be passed to Keras' `fit_generator()` function. test_generator = test_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'processed_labels', 'filenames'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'processed_labels', 'filenames'}, keep_images_without_gt=False) test_dataset_size = test_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size()
def run(train_dir, valid_dir, set_dir, model_dir): # train_dir = arguments.train_dir # valid_dir = arguments.valid_dir train_dataset_dir = train_dir train_annot_dir = train_dir + '/annot/' train_set = train_dir + '/img_set.txt' valid_dataset_dir = valid_dir valid_annot_dir = valid_dir + '/annot/' valid_set = valid_dir + '/valid_set.txt' # Set Training and Validation dataset paths batch_size = 16 print('Using batch size of: {}'.format(batch_size)) #model_path = 'COCO_512.h5' model_path = model_dir # model_path = 'saved_model.h5' # Needs to know classes and order to map to integers classes = ['background', 'car', 'bus', 'truck'] # Set required parameters for training of SSD img_height = 512 img_width = 512 img_channels = 3 # Colour image mean_color = [123, 117, 104] # DO NOT CHANGE swap_channels = [2, 1, 0] # Original SSD used BGR n_classes = 3 # 80 for COCO scales_coco = [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9, 1.06] scales = scales_coco aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] two_boxes_for_ar1 = True steps = [8, 16, 32, 64, 128, 256, 512] offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] clip_boxes = False variances = [0.1, 0.1, 0.2, 0.2] normalize_coords = True K.clear_session() model = ssd_512(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) model.load_weights(model_path, by_name=True) sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=sgd, loss=ssd_loss.compute_loss) # model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes, # 'L2Normalization': L2Normalization, # 'compute_loss': ssd_loss.compute_loss}) # Create Data Generators for train and valid sets train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) valid_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) train_dataset.parse_xml(images_dirs=[train_dataset_dir], image_set_filenames=[train_set], annotations_dirs=[train_annot_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) valid_dataset.parse_xml(images_dirs=[valid_dataset_dir], image_set_filenames=[valid_set], annotations_dirs=[valid_annot_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) # Will speed up trainig but requires more memory # Can comment out to avoid memory requirements ''' train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5', resize=False, variable_image_size=True, verbose=True) valid_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', resize=False, variable_image_size=True, verbose=True) ''' ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = valid_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() valid_dataset_size = valid_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( valid_dataset_size)) model_checkpoint = ModelCheckpoint( filepath= 'ssd_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) #csv_logger = CSVLogger(filename='ssd512_training_log.csv', # separator=',', # append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan ] #callbacks = [learning_rate_scheduler, # terminate_on_nan] initial_epoch = 0 final_epoch = 150 # 150 steps_per_epoch = math.ceil(119 / batch_size) # ceil(num_samples/batch_size) # Training history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=math.ceil( valid_dataset_size / batch_size), initial_epoch=initial_epoch) # Save final trained model model.save('trained.h5') # Make predictions predict_generator = valid_dataset.generate( batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) i = 0 # Which batch item to look at print("Image:", batch_filenames[i]) print() print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.2, iou_threshold=0.4, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) # Set the colors for the bounding boxes colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist() # classes = ['background', 'car', 'bus', 'truck', 'motorbike'] # Already set at start plt.figure(figsize=(20, 12)) plt.imshow(batch_original_images[i]) current_axis = plt.gca() for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) plt.show() return
def main(): model_mode = 'inference' K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(Config.img_height, Config.img_width, Config.img_channels), n_classes=Config.n_classes, mode=model_mode, l2_regularization=Config.l2_regularization, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=True, steps=Config.steps, offsets=Config.offsets, clip_boxes=False, variances=Config.variances, normalize_coords=Config.normalize_coords, subtract_mean=Config.mean_color, swap_channels=[2, 1, 0], confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400) # 2: Load the trained weights into the model. weights_path = os.getcwd() + '/weights/' + args.model_name + ".h5" model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) test_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_test.h5') test_dataset_size = test_dataset.get_dataset_size() print("Number of images in the test dataset:\t{:>6}".format( test_dataset_size)) classes = ['background', 'polyp'] generator = test_dataset.generate(batch_size=1, shuffle=True, transformations=[], returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) # Generate a batch and make predictions. i = 0 confidence_threshold = Config.confidence_threshold for val in range(test_dataset_size): batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( generator) print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) y_pred = model.predict(batch_images) # Perform confidence thresholding. y_pred_thresh = [ y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0]) ] # Convert the predictions for the original image. # y_pred_thresh_inv = apply_inverse_transforms(y_pred_thresh, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_thresh[i]) plt.figure(figsize=(20, 12)) plt.imshow(batch_images[i]) current_axis = plt.gca() colors = plt.cm.hsv( np.linspace(0, 1, Config.n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'polyps' ] # Just so we can print class names onto the image instead of IDs for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) for box in y_pred_thresh[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) image = plt.gcf() # plt.show() plt.draw() image.savefig(os.getcwd() + "/val_ssd300/val_" + str(val) + ".png", dpi=100) evaluator = Evaluator(model=model, n_classes=Config.n_classes, data_generator=test_dataset, model_mode=model_mode) results = evaluator(img_height=Config.img_height, img_width=Config.img_width, batch_size=args.batch_size, data_generator_mode='resize', round_confidences=False, matching_iou_threshold=0.3, border_pixels='include', sorting_algorithm='quicksort', average_precision_mode='sample', num_recall_points=11, ignore_neutral_boxes=True, return_precisions=True, return_recalls=True, return_average_precisions=True, verbose=True) mean_average_precision, average_precisions, precisions, recalls, tp_count, fp_count, fn_count, polyp_precision, polyp_recall = results print("TP : %d, FP : %d, FN : %d " % (tp_count, fp_count, fn_count)) print("{:<14}{:<6}{}".format('polyp', 'Precision ', round(polyp_precision, 3))) print("{:<14}{:<6}{}".format('polyp', 'Recall ', round(polyp_recall, 3))) # for i in range(1, len(average_precisions)): # print("{:<14}{:<6}{}".format(classes[i], 'AP', round(average_precisions[i], 3))) # # print("{:<14}{:<6}{}".format('', 'mAP', round(mean_average_precision, 3))) # print('Precisions', np.mean(precisions[1])) # print('Recalls', np.mean(recalls[1])) m = max((Config.n_classes + 1) // 2, 2) n = 2 fig, cells = plt.subplots(m, n, figsize=(n * 8, m * 8)) val = 0 for i in range(m): for j in range(n): if n * i + j + 1 > Config.n_classes: break cells[i, j].plot(recalls[n * i + j + 1], precisions[n * i + j + 1], color='blue', linewidth=1.0) cells[i, j].set_xlabel('recall', fontsize=14) cells[i, j].set_ylabel('precision', fontsize=14) cells[i, j].grid(True) cells[i, j].set_xticks(np.linspace(0, 1, 11)) cells[i, j].set_yticks(np.linspace(0, 1, 11)) cells[i, j].set_title("{}, AP: {:.3f}".format( classes[n * i + j + 1], average_precisions[n * i + j + 1]), fontsize=16) image = plt.gcf() # plt.show() plt.draw() image.savefig(os.getcwd() + "/test_out/test_" + str(val) + ".png", dpi=100) val += 1
scales=scales, aspect_ratios_per_layer=aspect_ratios, angles_global=angles, offsets=offsets, variances=variances, matching_type='multi', pos_ariou_threshold=pos_ariou_threshold, neg_ariou_limit=neg_ariou_limit, normalize_coords=normalize_coords) # Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate(batch_size=batch_size, shuffle=True, transformations=[data_augmentation], label_encoder=drbox_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate(batch_size=batch_size, shuffle=True, transformations=[], label_encoder=drbox_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size()
def main(job_dir, **args): ##Setting up the path for saving logs logs_dir = job_dir + 'logs/' data_dir = "gs://deeplearningteam11/data" print("Current Directory: " + os.path.dirname(__file__)) print("Lets copy the data to: " + os.path.dirname(__file__)) os.system("gsutil -m cp -r " + data_dir + " " + os.path.dirname(__file__) + " > /dev/null 2>&1 ") #exit(0) with tf.device('/device:GPU:0'): # 1: Build the Keras model. K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # Set the path to the `.h5` file of the model to be loaded. model_file = file_io.FileIO('gs://deeplearningteam11/vgg19BNmodel.h5', mode='rb') # Store model locally on instance model_path = 'model.h5' with open(model_path, 'wb') as f: f.write(model_file.read()) model_file.close() ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'DecodeDetections': DecodeDetections, 'compute_loss': ssd_loss.compute_loss }) for layer in model.layers: layer.trainable = True model.summary() # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. # VOC 2007 # The directories that contain the images. VOC_2007_train_images_dir = 'data/data/VOC2007/train/JPEGImages/' VOC_2007_test_images_dir = 'data/data/VOC2007/test/JPEGImages/' VOC_2007_train_anns_dir = 'data/data/VOC2007/train/Annotations/' VOC_2007_test_anns_dir = 'data/data/VOC2007/test/Annotations/' # The paths to the image sets. VOC_2007_trainval_image_set_dir = 'data/data/VOC2007/train/ImageSets/Main/' VOC_2007_test_image_set_dir = 'data/data/VOC2007/test/ImageSets/Main/' VOC_2007_train_images_dir = os.path.dirname( __file__) + "/" + VOC_2007_train_images_dir VOC_2007_test_images_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_images_dir VOC_2007_train_anns_dir = os.path.dirname( __file__) + "/" + VOC_2007_train_anns_dir VOC_2007_test_anns_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_anns_dir VOC_2007_trainval_image_set_dir = os.path.dirname( __file__) + "/" + VOC_2007_trainval_image_set_dir VOC_2007_test_image_set_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_image_set_dir VOC_2007_trainval_image_set_filename = VOC_2007_trainval_image_set_dir + '/trainval.txt' VOC_2007_test_image_set_filename = VOC_2007_test_image_set_dir + '/test.txt' # The XML parser needs to now what object class names to look for and in which order to map them to integers. classes = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] print("Parsing Training Data ...") train_dataset.parse_xml( images_dirs=[VOC_2007_train_images_dir], image_set_filenames=[VOC_2007_trainval_image_set_filename], annotations_dirs=[VOC_2007_train_anns_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False, verbose=False) print("Done") print( "================================================================") print("Parsing Test Data ...") val_dataset.parse_xml( images_dirs=[VOC_2007_test_images_dir], image_set_filenames=[VOC_2007_test_image_set_filename], annotations_dirs=[VOC_2007_test_anns_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False, verbose=False) print("Done") print( "================================================================") # 3: Set the batch size. batch_size = 32 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. # For the training generator: ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_4_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3], model.get_layer('conv11_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) # Define a learning rate schedule. def lr_schedule(epoch): return 1e-6 # if epoch < 80: # return 0.001 # elif epoch < 100: # return 0.0001 # else: # return 0.00001 learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [learning_rate_scheduler, terminate_on_nan] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 120 final_epoch = 200 steps_per_epoch = 500 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) model_name = "vgg19BNmodel_cont.h5" model.save(model_name) with file_io.FileIO(model_name, mode='rb') as input_f: with file_io.FileIO("gs://deeplearningteam11/" + model_name, mode='w+') as output_f: output_f.write(input_f.read())
def _main_(args): print('Hello World! This is {:s}'.format(args.desc)) # config_path = args.conf # with open(config_path) as config_buffer: # config = json.loads(config_buffer.read()) ############################################################# # Set model parameters ############################################################# img_height = 300 # Height of the model input images img_width = 300 # Width of the model input images img_channels = 3 # Number of color channels of the model input images mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets scales = scales_pascal aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer. offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True ############################################################# # Create the model ############################################################# # 1: Build the Keras model. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # 2: Load some weights into the model. # 3: Instantiate an optimizer and the SSD loss function and compile the model. adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) ############################################################# # Prepare the data ############################################################# # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. VOC_2007_images_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages' VOC_2007_annotations_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations' VOC_2007_train_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt' VOC_2007_val_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt' # VOC_2007_trainval_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt' # VOC_2007_test_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt' classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_train_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_val_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5', resize=False, variable_image_size=True, verbose=True) val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', resize=False, variable_image_size=True, verbose=True) # 3: Set the batch size. batch_size = 8 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3]] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate(batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate(batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size)) ############################################################# # Kick off the training ############################################################# # Define model callbacks. model_checkpoint = ModelCheckpoint( filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan] # Train initial_epoch = 0 final_epoch = 120 steps_per_epoch = 1000 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) ############################################################# # Run the evaluation ############################################################# # 1: Set the generator for the predictions. predict_generator = val_dataset.generate(batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels'}, keep_images_without_gt=False) # 2: Generate samples. batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) i = 0 # Which batch item to look at print("Image:", batch_filenames[i]) print() print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) # 3: Make predictions. y_pred = model.predict(batch_images) # 4: Decode the raw predictions in `y_pred`. y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) # 5: Convert the predictions for the original image. y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) # 6: Draw the predicted boxes onto the image # Set the colors for the bounding boxes colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist() classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] plt.figure(figsize=(20, 12)) plt.imshow(batch_original_images[i]) current_axis = plt.gca() for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': 'green', 'alpha': 1.0}) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
# Images images_dir = './data/' out_dir = './predict_error' # Ground truth eval_labels_filename = './data/labels_eval.csv' eval_dataset.parse_csv(images_dir=images_dir, labels_filename=eval_labels_filename, input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'], include_classes='all') # 得到训练和validation数据集的数据的量. eval_dataset_size = eval_dataset.get_dataset_size() print("evaluate集的图像数量\t{:>6}".format(eval_dataset_size)) predict_generator = eval_dataset.generate(batch_size=eval_dataset_size, shuffle=True, transformations=[], label_encoder=None, returns={'processed_images', 'processed_labels', 'filenames'}, keep_images_without_gt=False) batch_images, batch_labels, batch_filenames = next(predict_generator) # 3: 作预测 y_pred = model.predict(batch_images) # 4: 解码 `y_pred` # 如果我们训练是设置的是 'inference' 或者 'inference_fast' mode, 那么模型的最后一层为 `DecodeDetections` 层, # `y_pred` 就无需解码了. 但是我们选择了 'training' mode, 模型的原始输出需要解码. 这就是 `decode_detections()` # 这个函数的功能. 这个函数的功能和 `DecodeDetections` 层做的事情一样, 只是使用 Numpy 而不是 TensorFlow 实现. # (Nunpy 只能使用CPU, 而不是GPU). y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.45, top_k=200,
# # The paths to the image sets. fire_imagesets = params["image_sets"] file_names=params["image_names"] classes = (loads(params["classes"])) test_dataset = DataGenerator(load_images_into_memory=False, images_dir=fire_img, filenames=file_names) # 1: Set the generator for the predictions. predict_generator = test_dataset.generate(batch_size=test_dataset.get_dataset_size(), shuffle=True, transformations=val_preprocessing(img_height, img_width), label_encoder=None, returns={'processed_images', 'filenames', 'inverse_transform', 'original_images', }, keep_images_without_gt=True) # 2: Generate samples. test_dataset_size = test_dataset.get_dataset_size() print("Number of images in the validation dataset:\t{:>6}".format(test_dataset_size)) batch_images, batch_filenames, batch_inverse_transforms, batch_original_images = next( predict_generator) for i in range(test_dataset.get_dataset_size()): print("Image:", batch_filenames[i])
def set_generator(self, train_images_dir, train_annotation_path, batch_size, val_images_dir=None, val_annotation_path=None): train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) train_dataset.parse_json(images_dirs=[train_images_dir], annotations_filenames=[train_annotation_path], ground_truth_available=True, include_classes='all', ret=False, verbose=True) train_dataset_size = train_dataset.get_dataset_size() if self.model_name == 'ssd_7': # Define the image processing chain. ssd_data_augmentation = DataAugmentationConstantInputSize( random_brightness=(-48, 48, 0.5), random_contrast=(0.5, 1.8, 0.5), random_saturation=(0.5, 1.8, 0.5), random_hue=(18, 0.5), random_flip=0.5, random_translate=((0.03, 0.5), (0.03, 0.5), 0.5), random_scale=(0.5, 2.0, 0.5), n_trials_max=3, clip_boxes=True, overlap_criterion='area', bounds_box_filter=(0.3, 1.0), bounds_validator=(0.5, 1.0), n_boxes_min=1, background=(0, 0, 0)) # Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ self.model.get_layer('classes4').output_shape[1:3], self.model.get_layer('classes5').output_shape[1:3], self.model.get_layer('classes6').output_shape[1:3], self.model.get_layer('classes7').output_shape[1:3] ] elif self.model_name == 'ssd_300': # For the training generator: ssd_data_augmentation = SSDDataAugmentation( img_height=self.image_size[0], img_width=self.image_size[1], background=self.mean_color) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ self.model.get_layer( 'conv4_3_norm_mbox_conf').output_shape[1:3], self.model.get_layer('fc7_mbox_conf').output_shape[1:3], self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] elif self.model_name == 'ssd_512': # For the training generator: ssd_data_augmentation = SSDDataAugmentation( img_height=self.image_size[0], img_width=self.image_size[1], background=self.mean_color) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ self.model.get_layer( 'conv4_3_norm_mbox_conf').output_shape[1:3], self.model.get_layer('fc7_mbox_conf').output_shape[1:3], self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=self.image_size[0], img_width=self.image_size[1], n_classes=self.n_classes, predictor_sizes=predictor_sizes, scales=self.scales, aspect_ratios_per_layer=self.aspect_ratios_per_layer, two_boxes_for_ar1=self.two_boxes_for_ar1, steps=self.steps, offsets=self.offsets, clip_boxes=self.clip_boxes, variances=self.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=self.normalize_coords) self.generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) self.steps_per_epoch = ceil(train_dataset_size / batch_size) if val_images_dir is not None and val_annotation_path is not None: val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) val_dataset.parse_json(images_dirs=[val_images_dir], annotations_filenames=[val_annotation_path], ground_truth_available=True, include_classes='all', ret=False, verbose=True) val_dataset_size = val_dataset.get_dataset_size() if self.model_name == 'ssd_300' or self.model_name == 'ssd_512': # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=self.image_size[0], width=self.image_size[1]) transformations = [convert_to_3_channels, resize] else: transformations = [] self.validation_data = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=transformations, label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) self.validation_steps = ceil(val_dataset_size / batch_size) else: self.validation_data = None self.validation_steps = None
image_set_filenames=[VOC_2007_test_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) generator = dataset.generate(batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels', 'masks' }, keep_images_without_gt=False) # In[13]: #print(generator) # Generate a batch and make predictions. confidence_threshold = 0.5 count_arr = [] acc_arr = [] plt.figure(figsize=(8, 6)) for epoch in range(1000):
two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[data_augmentation_chain], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # ## 4. Set the remaining training parameters and train the model # # We've already chosen an optimizer and a learning rate and set the batch size above, now let's set the remaining training parameters.
# 这里 clip 的是 anchor boxes clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, border_pixels='half', coords='centroids', normalize_coords=normalize_coords, ) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=(data_augmentation_chain, ), label_encoder=ssd_input_encoder, returns=('processed_images', 'encoded_labels'), keep_images_without_gt=False) val_generator = val_dataset.generate(batch_size=batch_size, shuffle=False, transformations=(), label_encoder=ssd_input_encoder, returns=('processed_images', 'encoded_labels'), keep_images_without_gt=False) # Define model callbacks. # TODO: Set the filepath under which you want to save the weights. model_checkpoint = ModelCheckpoint( filepath=
val_dataset = DataGenerator() if labels: val_dataset = DataGenerator() val_dataset.parse_csv(images_dir=images_folder, labels_filename=labels, include_classes='all') ret = {'processed_images', 'original_labels', 'filenames'} else: filenames = os.listdir(images_folder) val_dataset = DataGenerator(filenames = filenames) ret = {'processed_images', 'filenames'} predict_generator = val_dataset.generate(batch_size=1, shuffle=True, transformations=[], label_encoder=None, returns=ret, keep_images_without_gt=False) val_dataset_size = val_dataset.get_dataset_size() print('dataset size : ', val_dataset_size) # load model drbox_loss = DRBoxLoss(neg_pos_ratio=3, alpha=1.0) # Set the path to the model you want to evaluate model_drbox = load_model(model_name, custom_objects={'L2Normalization': L2Normalization, 'AnchorBoxes': AnchorBoxes, 'compute_loss': drbox_loss.compute_loss}) # Predict the boxes for every image in the validation dataset
classes = (loads(params["classes"])) test_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=params["hdf5_test_path"], images_dir=fire_img, filenames=fire_test) train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=params["hdf5_train_path"], images_dir=fire_img, filenames=fire_train) # 1: Set the generator for the predictions. train_size = int(params["view_train"]) test_size=int(params["view_test"]) predict_generator = test_dataset.generate(batch_size=test_size, shuffle=True, transformations=val_preprocessing(img_height, img_width), label_encoder=None, returns={'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels'}, keep_images_without_gt=False) data_generator = train_dataset.generate(batch_size=train_size, shuffle=True, transformations=training_preprocessing(img_height, img_width, mean_color), label_encoder=None, returns={'processed_images', 'processed_labels', 'filenames', 'original_images', 'original_labels'}, keep_images_without_gt=False)
input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'], # This is the order of the first six columns in the CSV file that contains the labels for your dataset. If your labels are in XML format, maybe the XML parser will be helpful, check the documentation. include_classes='all', random_sample=False) print("Number of images in the dataset:", dataset.get_dataset_size()) convert_to_3_channels = ConvertTo3Channels() random_max_crop = RandomMaxCropFixedAR(patch_aspect_ratio=img_width/img_height) resize = Resize(height=img_height, width=img_width) generator = dataset.generate(batch_size=1, shuffle=True, transformations=[convert_to_3_channels, random_max_crop, resize], returns={'processed_images', 'processed_labels', 'filenames'}, keep_images_without_gt=False) # Generate samples batch_images, batch_labels, batch_filenames = next(generator) i = 0 # Which batch item to look at print("Image:", batch_filenames[i]) print()
labels_filename=cfgs.TEST_LABEL_FILENAME, input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'], include_classes='all') if cfgs.CREATE_IMAGE_H5: val_dataset.create_hdf5_dataset(file_path=cfgs.TEST_HDF_DATASET, resize=(cfgs.IMG_SHORT_SIDE_LEN, cfgs.IMG_SHORT_SIDE_LEN), variable_image_size=True, verbose=True) val_dataset_size = val_dataset.get_dataset_size() predict_generator = val_dataset.generate( batch_size=1, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) n_classes = 7 count = 0 for i in range(val_dataset_size): batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) count += 1 print(count) pos = batch_filenames[0].rfind('/') image_name = batch_filenames[0][pos + 1:len(batch_filenames[0])] time_start = time.time()
def load_VOC_IMG_generators(self,model): print('Making VOC image generators') datadir = self.datas['DATA_PATH'] train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) test_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) images_dir = os.path.join(datadir,'Images') annotations_dir = os.path.join(datadir,'Annotations') train_image_set_filename = os.path.join(datadir,'ImageSets','train.txt') val_image_set_filename = os.path.join(datadir,'ImageSets','val.txt') test_image_set_filename = os.path.join(datadir,'ImageSets','test.txt') generator_options = self.datas['GENERATOR'] train_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[train_image_set_filename], annotations_dirs=[annotations_dir], classes=self.datas['CLASSES'], include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[val_image_set_filename], annotations_dirs=[annotations_dir], classes=self.datas['CLASSES'], include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) test_dataset.parse_xml(images_dirs=[images_dir], image_set_filenames=[test_image_set_filename], annotations_dirs=[annotations_dir], classes=self.datas['CLASSES'], include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) convert_to_3_channels = ConvertTo3Channels() target_size = generator_options['TARGET_SIZE'] resize = Resize(height=target_size[0], width=target_size[1]) predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3]] scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets scales = scales_pascal aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer. two_boxes_for_ar1 = True mean_color=[123,117,104] #TODO : add this as a parameter offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] clip_boxes=False variances=[0.1, 0.1, 0.2, 0.2] normalize_coords=True ssd_input_encoder = SSDInputEncoder(img_height = target_size[0], img_width = target_size[1], n_classes = 20, #TODO : handle subsampling predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords ) train_generator = train_dataset.generate(batch_size=generator_options['BATCH_SIZE'], shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate(batch_size=generator_options['BATCH_SIZE'], shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) test_generator = test_dataset.generate(batch_size=generator_options['BATCH_SIZE'], shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) return [train_generator,train_dataset.get_dataset_size()],[val_generator,val_dataset.get_dataset_size()],[test_generator,train_dataset.get_dataset_size()]