Ejemplo n.º 1
0
def get_model(weights_path):
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    K.clear_session()
    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)

    # 2: Load some weights into the model.
    model.load_weights(weights_path, by_name=True)

    sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)

    model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

    return model
Ejemplo n.º 2
0
    def __init__(self,
                 resize_height,
                 resize_width,
                 random_brightness=(-20, 20, 0.5),
                 random_contrast=(0.8, 1.0, 0.5),
                 random_saturation=(0.8, 1.8, 0.5),
                 random_hue=(10, 0.5),
                 random_flip=0.5,
                 random_rotate_small=([np.pi / 40, np.pi / 30], 0.5),
                 random_rotate_big=([np.pi / 2, np.pi, 3 * np.pi / 2], 0.5),
                 min_scale=0.8,
                 max_scale=1.05,
                 min_aspect_ratio=0.8,
                 max_aspect_ratio=1.2,
                 n_trials_max=3,
                 overlap_criterion='center_point',
                 bounds_box_filter=(0.3, 1.0),
                 bounds_validator=(0.5, 1.0),
                 n_boxes_min=1,
                 random_translate=((0.03, 0.05), (0.03, 0.05), 0.5),
                 random_scale=(0.9, 1.1, 0.5),
                 proba_no_aug=1 / 3):

        self.n_trials_max = n_trials_max
        self.overlap_criterion = overlap_criterion
        self.bounds_box_filter = bounds_box_filter
        self.bounds_validator = bounds_validator
        self.n_boxes_min = n_boxes_min

        self.proba_no_aug = proba_no_aug  # the probability of not performing any transformations

        # Determines which boxes are kept in an image after the transformations have been applied.
        self.box_filter = BoxFilter(check_overlap=True,
                                    check_min_area=False,
                                    check_degenerate=False,
                                    overlap_criterion=self.overlap_criterion,
                                    overlap_bounds=self.bounds_box_filter)

        self.box_filter_resize = BoxFilter(check_overlap=False,
                                           check_min_area=True,
                                           check_degenerate=True,
                                           min_area=16)

        # Determines whether the result of the transformations is a valid training image.
        self.image_validator = ImageValidator(
            overlap_criterion=self.overlap_criterion,
            bounds=self.bounds_validator,
            n_boxes_min=self.n_boxes_min)

        # Utility transformations
        self.convert_to_3_channels = ConvertTo3Channels(
        )  # Make sure all images end up having 3 channels.
        self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
        self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
        self.convert_to_float32 = ConvertDataType(to='float32')
        self.convert_to_uint8 = ConvertDataType(to='uint8')
        self.resize = Resize(height=resize_height,
                             width=resize_width,
                             box_filter=self.box_filter_resize)

        # Photometric transformations
        self.random_brightness = RandomBrightness(lower=random_brightness[0],
                                                  upper=random_brightness[1],
                                                  prob=random_brightness[2])
        self.random_contrast = RandomContrast(lower=random_contrast[0],
                                              upper=random_contrast[1],
                                              prob=random_contrast[2])
        self.random_saturation = RandomSaturation(lower=random_saturation[0],
                                                  upper=random_saturation[1],
                                                  prob=random_saturation[2])
        self.random_hue = RandomHue(max_delta=random_hue[0],
                                    prob=random_hue[1])

        # Geometric transformations
        self.random_horizontal_flip = RandomFlip(dim='horizontal',
                                                 prob=random_flip)
        self.random_vertical_flip = RandomFlip(dim='vertical',
                                               prob=random_flip)
        self.random_translate = RandomTranslate(
            dy_minmax=random_translate[0],
            dx_minmax=random_translate[1],
            prob=random_translate[2],
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max)

        self.random_rotate_small = RandomRotate(
            angles=random_rotate_small[0],
            prob=random_rotate_small[1],
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max)

        self.random_rotate_big = RandomRotate(
            angles=random_rotate_big[0],
            prob=random_rotate_big[1],
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max)

        self.random_zoom_in = RandomScale(min_factor=1.0,
                                          max_factor=random_scale[1],
                                          prob=random_scale[2],
                                          box_filter=self.box_filter,
                                          image_validator=self.image_validator,
                                          n_trials_max=self.n_trials_max)

        self.random_zoom_out = RandomScale(
            min_factor=random_scale[0],
            max_factor=random_scale[0],
            prob=random_scale[2],
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max)

        # random patch generator is not used for the moment but it could be useful in your project
        self.patch_coord_generator = PatchCoordinateGenerator(
            must_match='h_w',
            min_scale=min_scale,
            max_scale=max_scale,
            scale_uniformly=False,
            min_aspect_ratio=min_aspect_ratio,
            max_aspect_ratio=max_aspect_ratio)

        self.random_patch = RandomPatch(
            patch_coord_generator=self.patch_coord_generator,
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max,
            prob=0.5,
            can_fail=False)

        # If we zoom in, do translation before scaling.
        self.sequence1 = [
            self.convert_to_3_channels, self.convert_to_float32,
            self.random_brightness, self.random_contrast,
            self.convert_to_uint8, self.convert_RGB_to_HSV,
            self.convert_to_float32, self.random_saturation, self.random_hue,
            self.convert_to_uint8, self.convert_HSV_to_RGB,
            self.random_horizontal_flip, self.random_vertical_flip,
            self.random_translate, self.random_rotate_big,
            self.random_rotate_small, self.random_zoom_in, self.random_patch,
            self.resize
        ]

        # If we zoom out, do translation after scaling.
        self.sequence2 = [
            self.convert_to_3_channels, self.convert_to_float32,
            self.random_brightness, self.random_contrast,
            self.convert_to_uint8, self.convert_RGB_to_HSV,
            self.convert_to_float32, self.random_saturation, self.random_hue,
            self.convert_to_uint8, self.convert_HSV_to_RGB,
            self.random_horizontal_flip, self.random_vertical_flip,
            self.random_zoom_out, self.random_translate,
            self.random_rotate_big, self.random_rotate_small,
            self.random_patch, self.resize
        ]

        self.sequence3 = [
            self.convert_to_3_channels, self.convert_to_uint8,
            self.random_horizontal_flip, self.random_vertical_flip,
            self.random_translate, self.random_rotate_big,
            self.random_rotate_small, self.resize
        ]
Ejemplo n.º 3
0
def _main_(args):
    print('Hello World! This is {:s}'.format(args.desc))

    # config_path = args.conf
    # with open(config_path) as config_buffer:    
    #     config = json.loads(config_buffer.read())
    #############################################################
    #   Set model parameters
    #############################################################
    img_height          = 300  # Height of the model input images
    img_width           = 300  # Width of the model input images
    img_channels        = 3  # Number of color channels of the model input images
    mean_color          = [123, 117, 104]  # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
    swap_channels       = [2, 1, 0]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
    n_classes           = 20  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales_pascal       = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
    scales_coco         = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]  # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
    scales              = scales_pascal
    aspect_ratios       = [[1.0, 2.0, 0.5],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                           [1.0, 2.0, 0.5],
                           [1.0, 2.0, 0.5]]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1   = True
    steps               = [8, 16, 32, 64, 100, 300]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets             = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes          = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
    variances           = [0.1, 0.1, 0.2, 0.2]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords    = True

    #############################################################
    #   Create the model
    #############################################################
    # 1: Build the Keras model.
    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)
    # 2: Load some weights into the model.

    # 3: Instantiate an optimizer and the SSD loss function and compile the model.
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    #############################################################
    #   Prepare the data
    #############################################################
    # 1: Instantiate two `DataGenerator` objects: One for training, one for validation.
    train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.
    VOC_2007_images_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages'
    VOC_2007_annotations_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations'
    VOC_2007_train_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt'
    VOC_2007_val_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt'
    # VOC_2007_trainval_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt'
    # VOC_2007_test_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt'

    classes = ['background',
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat',
               'chair', 'cow', 'diningtable', 'dog',
               'horse', 'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

    train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                            image_set_filenames=[VOC_2007_train_image_set_filename],
                            annotations_dirs=[VOC_2007_annotations_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)
    val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir],
                          image_set_filenames=[VOC_2007_val_image_set_filename],
                          annotations_dirs=[VOC_2007_annotations_dir],
                          classes=classes,
                          include_classes='all',
                          exclude_truncated=False,
                          exclude_difficult=True,
                          ret=False)

    train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)

    val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)
    # 3: Set the batch size.
    batch_size = 8  # Change the batch size if you like, or if you run into GPU memory issues.

    # 4: Set the image transformations for pre-processing and data augmentation options.
    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
                       model.get_layer('fc7_mbox_conf').output_shape[1:3],
                       model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                       model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(batch_size=batch_size,
                                             shuffle=True,
                                             transformations=[ssd_data_augmentation],
                                             label_encoder=ssd_input_encoder,
                                             returns={'processed_images',
                                                      'encoded_labels'},
                                             keep_images_without_gt=False)

    val_generator = val_dataset.generate(batch_size=batch_size,
                                         shuffle=False,
                                         transformations=[convert_to_3_channels,
                                                          resize],
                                         label_encoder=ssd_input_encoder,
                                         returns={'processed_images',
                                                  'encoded_labels'},
                                         keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size))

    #############################################################
    #   Kick off the training
    #############################################################
    # Define model callbacks.
    model_checkpoint = ModelCheckpoint(
        filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)

    csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv',
                           separator=',',
                           append=True)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [model_checkpoint,
                 csv_logger,
                 learning_rate_scheduler,
                 terminate_on_nan]

    # Train
    initial_epoch = 0
    final_epoch = 120
    steps_per_epoch = 1000

    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size / batch_size),
                                  initial_epoch=initial_epoch)

    #############################################################
    #   Run the evaluation
    #############################################################
    # 1: Set the generator for the predictions.
    predict_generator = val_dataset.generate(batch_size=1,
                                             shuffle=True,
                                             transformations=[convert_to_3_channels,
                                                              resize],
                                             label_encoder=None,
                                             returns={'processed_images',
                                                      'filenames',
                                                      'inverse_transform',
                                                      'original_images',
                                                      'original_labels'},
                                             keep_images_without_gt=False)

    # 2: Generate samples.
    batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
        predict_generator)

    i = 0  # Which batch item to look at

    print("Image:", batch_filenames[i])
    print()
    print("Ground truth boxes:\n")
    print(np.array(batch_original_labels[i]))

    # 3: Make predictions.
    y_pred = model.predict(batch_images)

    # 4: Decode the raw predictions in `y_pred`.
    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.5,
                                       iou_threshold=0.4,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    # 5: Convert the predictions for the original image.
    y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms)
    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    print(y_pred_decoded_inv[i])

    # 6: Draw the predicted boxes onto the image
    # Set the colors for the bounding boxes
    colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist()
    classes = ['background',
               'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat',
               'chair', 'cow', 'diningtable', 'dog',
               'horse', 'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

    plt.figure(figsize=(20, 12))
    plt.imshow(batch_original_images[i])

    current_axis = plt.gca()

    for box in batch_original_labels[i]:
        xmin = box[1]
        ymin = box[2]
        xmax = box[3]
        ymax = box[4]
        label = '{}'.format(classes[int(box[0])])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2))
        current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': 'green', 'alpha': 1.0})

    for box in y_pred_decoded_inv[i]:
        xmin = box[2]
        ymin = box[3]
        xmax = box[4]
        ymax = box[5]
        color = colors[int(box[0])]
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2))
        current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
                          exclude_difficult=True,
                          ret=False)

    with open('val_dataset.pickle', mode='wb') as f:
        pickle.dump(val_dataset, f)

batch_size = 8  # Change the batch size if you like, or if you run into GPU memory issues.

# For the training generator:
ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                            img_width=img_width,
                                            background=mean_color)

# For the validation generator:
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(img_height, img_width)

ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=predictor_sizes,
                                    scales=scales,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    matching_type='multi',
                                    pos_iou_threshold=0.5,
                                    neg_iou_limit=0.5,
Ejemplo n.º 5
0
def train_VOC(config):
    '''
    Train the given configuration ; the configuration must be constructed
    according to the utility script found in utils/generateconfig.py. 

    Arguments:
        config : the configuration of the model to use ; should already be
            loaded

    '''
    ###################################
    ### PATHS AND PARAMETERS
    ##################################
    datadir = config.DATA_DIR
    local_dir = config.ROOT_FOLDER
    img_shape = config.IMG_SHAPE
    classes = config.CLASSES
    checkpoint_output = os.path.join(local_dir, 'models',
                                     config.CHECKPOINT_NAME)
    model_output = os.path.join(local_dir, 'models', config.MODEL_NAME)
    img_height = img_shape[0]  # Height of the model input images
    img_width = img_shape[1]  # Width of the model input images
    img_channels = img_shape[
        2]  # Number of color channels of the model input images
    mean_color = [
        123, 117, 104
    ]  # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
    swap_channels = [
        2, 1, 0
    ]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
    n_classes = 20  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales_pascal = [
        0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
    ]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
    scales = scales_pascal
    aspect_ratios = [
        [1.0,
         2.0, 0.5], [1.0, 2.0, 0.5, 3.0,
                     1.0 / 3.0],
        [1.0, 2.0, 0.5,
         3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0
                           ], [1.0, 2.0, 0.5
                               ], [1.0, 2.0, 0.5]
    ]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1 = True
    steps = [
        8, 16, 32, 64, 100, 300
    ]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets = [
        0.5, 0.5, 0.5, 0.5, 0.5, 0.5
    ]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
    variances = [
        0.1, 0.1, 0.2, 0.2
    ]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords = True
    batch_size = config.BATCH_SIZE  # Change the batch size if you like, or if you run into GPU memory issues.

    ###################################
    ### BUILDING MODEL
    ##################################
    K.clear_session()  # Clear previous models from memory.

    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)

    weights_path = os.path.join(local_dir, 'weights',
                                'VGG_VOC0712_SSD_300x300_iter_120000.h5')
    model.load_weights(weights_path, by_name=True)

    #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

    ###################################
    ### LOADING DATAS
    ##################################
    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)
    images_dir = os.path.join(datadir, 'Images')
    annotations_dir = os.path.join(datadir, 'Annotations')
    trainval_image_set_filename = os.path.join(datadir, 'ImageSets',
                                               'train.txt')
    test_image_set_filename = os.path.join(datadir, 'ImageSets', 'val.txt')

    # The XML parser needs to now what object class names to look for and in which order to map them to integers.
    #

    train_dataset.parse_xml(images_dirs=[images_dir],
                            image_set_filenames=[trainval_image_set_filename],
                            annotations_dirs=[annotations_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)

    val_dataset.parse_xml(images_dirs=[images_dir],
                          image_set_filenames=[test_image_set_filename],
                          annotations_dirs=[annotations_dir],
                          classes=classes,
                          include_classes='all',
                          exclude_truncated=False,
                          exclude_difficult=True,
                          ret=False)

    train_dataset.create_hdf5_dataset(file_path='flowers_train.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)

    val_dataset.create_hdf5_dataset(file_path='flowers_val.h5',
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)

    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    ###################################
    ### PREPARE TRAINING
    ##################################

    def lr_schedule(epoch):
        if epoch < 80:
            return 0.001
        elif epoch < 100:
            return 0.0001
        else:
            return 0.00001

    model_checkpoint = ModelCheckpoint(filepath=checkpoint_output,
                                       monitor='val_loss',
                                       verbose=1,
                                       save_best_only=True,
                                       save_weights_only=False,
                                       mode='auto',
                                       period=1)

    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0.0,
                                   patience=10,
                                   verbose=1)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        model_checkpoint, learning_rate_scheduler, terminate_on_nan,
        early_stopping
    ]

    ###################################
    ### TRAINING
    ##################################
    epochs = config.EPOCHS
    steps_per_epoch = ceil(train_dataset_size / batch_size)
    model.summary()
    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=epochs,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size /
                                                        batch_size))

    model.save(model_output)
        verbose=True)

    val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)

    # 4: Set the image transformations for pre-processing and data augmentation options.

    # For the training generator:
    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)
    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
def main():
    create_new_model = True if args.model_name == 'default' else False

    if create_new_model:
        K.clear_session()  # Clear previous models from memory.
        model = ssd_512(image_size=(Config.img_height, Config.img_width,
                                    Config.img_channels),
                        n_classes=Config.n_classes,
                        mode='training',
                        l2_regularization=Config.l2_regularization,
                        scales=Config.scales,
                        aspect_ratios_per_layer=Config.aspect_ratios,
                        two_boxes_for_ar1=Config.two_boxes_for_ar1,
                        steps=Config.steps,
                        offsets=Config.offsets,
                        clip_boxes=Config.clip_boxes,
                        variances=Config.variances,
                        normalize_coords=Config.normalize_coords,
                        subtract_mean=Config.mean_color,
                        swap_channels=Config.swap_channels)

        adam = Adam(lr=0.001,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
    else:

        model_path = "weights/" + args.model_name + ".h5"
        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        K.clear_session()  # Clear previous models from memory.

        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'compute_loss': ssd_loss.compute_loss
                           })

    # Load the data
    train_dataset = DataGenerator(load_images_into_memory=True,
                                  hdf5_dataset_path=os.getcwd() + "/data/" +
                                  args.dataset + '/polyp_train.h5')
    val_dataset = DataGenerator(load_images_into_memory=True,
                                hdf5_dataset_path=os.getcwd() + "/data/" +
                                args.dataset + '/polyp_val.h5')
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()
    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    batch_size = args.batch_size

    # For the training generator:
    ssd_data_augmentation = SSDDataAugmentation(img_height=Config.img_height,
                                                img_width=Config.img_width,
                                                background=Config.mean_color)

    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=Config.img_height, width=Config.img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(
        img_height=Config.img_height,
        img_width=Config.img_width,
        n_classes=Config.n_classes,
        predictor_sizes=predictor_sizes,
        scales=Config.scales,
        aspect_ratios_per_layer=Config.aspect_ratios,
        two_boxes_for_ar1=Config.two_boxes_for_ar1,
        steps=Config.steps,
        offsets=Config.offsets,
        clip_boxes=Config.clip_boxes,
        variances=Config.variances,
        matching_type='multi',
        pos_iou_threshold=0.5,
        neg_iou_limit=0.5,
        normalize_coords=Config.normalize_coords)

    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    model_checkpoint = ModelCheckpoint(
        filepath=os.getcwd() +
        '/weights/ssd512_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=30)

    csv_logger = CSVLogger(filename='ssd512_training_log.csv',
                           separator=',',
                           append=True)
    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule)
    terminate_on_nan = TerminateOnNaN()

    tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs,
                                         histogram_freq=0,
                                         batch_size=batch_size,
                                         write_graph=True,
                                         write_grads=False,
                                         write_images=False)

    callbacks = [
        model_checkpoint, csv_logger, learning_rate_scheduler,
        terminate_on_nan, tf_log
    ]

    # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
    initial_epoch = 0
    final_epoch = args.final_epoch
    steps_per_epoch = 500

    # Train/Fit the model
    if args.predict_mode == 'train':
        history = model.fit_generator(generator=train_generator,
                                      steps_per_epoch=steps_per_epoch,
                                      epochs=final_epoch,
                                      callbacks=callbacks,
                                      validation_data=val_generator,
                                      validation_steps=ceil(val_dataset_size /
                                                            batch_size),
                                      initial_epoch=initial_epoch)

    # Prediction Output
    predict_generator = val_dataset.generate(
        batch_size=1,
        shuffle=True,
        transformations=[convert_to_3_channels, resize],
        label_encoder=None,
        returns={
            'processed_images', 'filenames', 'inverse_transform',
            'original_images', 'original_labels'
        },
        keep_images_without_gt=False)

    i = 0
    for val in range(val_dataset_size):
        batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
            predict_generator)

        y_pred = model.predict(batch_images)

        y_pred_decoded = decode_detections(
            y_pred,
            confidence_thresh=0.5,
            iou_threshold=0.4,
            top_k=200,
            normalize_coords=Config.normalize_coords,
            img_height=Config.img_height,
            img_width=Config.img_width)

        # 5: Convert the predictions for the original image.
        y_pred_decoded_inv = apply_inverse_transforms(
            y_pred_decoded, batch_inverse_transforms)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_decoded_inv[i])

        plt.figure(figsize=(20, 12))
        plt.imshow(batch_images[i])

        current_axis = plt.gca()

        colors = plt.cm.hsv(
            np.linspace(0, 1, Config.n_classes +
                        1)).tolist()  # Set the colors for the bounding boxes
        classes = [
            'background', 'polyps'
        ]  # Just so we can print class names onto the image instead of IDs

        for box in batch_original_labels[i]:
            xmin = box[1]
            ymin = box[2]
            xmax = box[3]
            ymax = box[4]
            label = '{}'.format(classes[int(box[0])])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='green',
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': 'green',
                                  'alpha': 1.0
                              })

        for box in y_pred_decoded_inv[i]:
            xmin = box[2]
            ymin = box[3]
            xmax = box[4]
            ymax = box[5]
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color=color,
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': color,
                                  'alpha': 1.0
                              })
        image = plt.gcf()
        plt.draw()
        image.savefig(os.getcwd() + "/val_ssd512val_" + str(val) + ".png",
                      dpi=100)
Ejemplo n.º 8
0
                      exclude_difficult=True,
                      ret=False)


batch_size = args.batch_size # Change the batch size if you like, or if you run into GPU memory issues.

# 4: Set the image transformations for pre-processing and data augmentation options.
# For the training generator:
ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                            img_width=img_width,
                                            background=mean_color)

# For the validation generator:
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=img_height,
                width=img_width,
                interpolation_mode = cv2.INTER_AREA)

# 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.

predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
                   model.get_layer('fc7_mbox_conf').output_shape[1:3],
                   model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
                   model.get_layer('conv10_2_mbox_conf').output_shape[1:3]]

ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                    img_width=img_width,
Ejemplo n.º 9
0
    def predict_on_dataset(self,
                           img_height,
                           img_width,
                           batch_size,
                           data_generator_mode='resize',
                           decoding_confidence_thresh=0.01,
                           decoding_iou_threshold=0.45,
                           decoding_top_k=200,
                           decoding_pred_coords='centroids',
                           decoding_normalize_coords=True,
                           decoding_border_pixels='include',
                           round_confidences=False,
                           verbose=True,
                           ret=False):

        class_id_pred = self.pred_format['class_id']
        conf_pred = self.pred_format['conf']
        xmin_pred = self.pred_format['xmin']
        ymin_pred = self.pred_format['ymin']
        xmax_pred = self.pred_format['xmax']
        ymax_pred = self.pred_format['ymax']

        convert_to_3_channels = ConvertTo3Channels()
        resize = Resize(height=img_height,
                        width=img_width,
                        labels_format=self.gt_format)
        if data_generator_mode == 'resize':
            transformations = [convert_to_3_channels, resize]
        elif data_generator_mode == 'pad':
            random_pad = RandomPadFixedAR(patch_aspect_ratio=img_width /
                                          img_height,
                                          labels_format=self.gt_format)
            transformations = [convert_to_3_channels, random_pad, resize]
        else:
            raise ValueError(
                "`data_generator_mode` can be either of 'resize' or 'pad', but received '{}'."
                .format(data_generator_mode))

        # Set the generator parameters.
        generator = self.data_generator.generate(
            batch_size=batch_size,
            shuffle=False,
            transformations=transformations,
            label_encoder=None,
            returns={
                'processed_images', 'image_ids', 'evaluation-neutral',
                'inverse_transform', 'original_labels'
            },
            keep_images_without_gt=True,
            degenerate_box_handling='remove')

        if self.data_generator.image_ids is None:
            self.data_generator.image_ids = list(
                range(self.data_generator.get_dataset_size()))

        #############################################################################################
        # Predict over all batches of the dataset and store the predictions.
        #############################################################################################

        # We have to generate a separate results list for each class.
        results = [list() for _ in range(self.n_classes + 1)]

        # Create a dictionary that maps image IDs to ground truth annotations.
        # We'll need it below.
        image_ids_to_labels = {}

        # Compute the number of batches to iterate over the entire dataset.
        n_images = self.data_generator.get_dataset_size()
        n_batches = int(floor(n_images / batch_size)) - 1
        if verbose:
            print("Number of images in the evaluation dataset: {}".format(
                n_images))
            print()
            tr = trange(n_batches, file=sys.stdout)
            tr.set_description('Producing predictions batch-wise')
        else:
            tr = range(n_batches)

        # Loop over all batches.
        for j in tr:
            # Generate batch.
            batch_X, batch_image_ids, batch_eval_neutral, batch_inverse_transforms, batch_orig_labels = next(
                generator)
            # Predict.
            y_pred = self.model.predict(batch_X)
            if self.model_mode == 'training':
                # Decode.
                y_pred = decode_detections(
                    y_pred,
                    confidence_thresh=decoding_confidence_thresh,
                    iou_threshold=decoding_iou_threshold,
                    top_k=decoding_top_k,
                    input_coords=decoding_pred_coords,
                    normalize_coords=decoding_normalize_coords,
                    img_height=img_height,
                    img_width=img_width,
                    border_pixels=decoding_border_pixels)
            else:
                # Filter out the all-zeros dummy elements of `y_pred`.
                y_pred_filtered = []
                for i in range(len(y_pred)):
                    y_pred_filtered.append(y_pred[i][y_pred[i, :, 0] != 0])
                y_pred = y_pred_filtered
            # Convert the predicted box coordinates for the original images.
            y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms)

            # Iterate over all batch items.
            for k, batch_item in enumerate(y_pred):

                image_id = batch_image_ids[k]

                for box in batch_item:
                    class_id = int(box[class_id_pred])
                    # Round the box coordinates to reduce the required memory.
                    if round_confidences:
                        confidence = round(box[conf_pred], round_confidences)
                    else:
                        confidence = box[conf_pred]
                    xmin = round(box[xmin_pred], 1)
                    ymin = round(box[ymin_pred], 1)
                    xmax = round(box[xmax_pred], 1)
                    ymax = round(box[ymax_pred], 1)
                    prediction = (image_id, confidence, xmin, ymin, xmax, ymax)
                    # Append the predicted box to the results list for its class.
                    results[class_id].append(prediction)

        self.prediction_results = results

        if ret:
            return results
def _main_(args):

    config_path = args.conf

    with open(config_path) as config_buffer:
        config = json.loads(config_buffer.read())

    ###############################
    #   Parse the annotations
    ###############################
    path_imgs_training = config['train']['train_image_folder']
    path_anns_training = config['train']['train_annot_folder']
    path_imgs_val = config['valid']['valid_image_folder']
    path_anns_val = config['valid']['valid_annot_folder']
    labels = config['model']['labels']
    categories = {}
    #categories = {"Razor": 1, "Gun": 2, "Knife": 3, "Shuriken": 4} #la categoría 0 es la background
    for i in range(len(labels)):
        categories[labels[i]] = i + 1
    print('\nTraining on: \t' + str(categories) + '\n')

    ####################################
    #   Parameters
    ###################################
    #%%
    img_height = config['model']['input']  # Height of the model input images
    img_width = config['model']['input']  # Width of the model input images
    img_channels = 3  # Number of color channels of the model input images
    mean_color = [
        123, 117, 104
    ]  # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
    swap_channels = [
        2, 1, 0
    ]  # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
    n_classes = len(
        labels
    )  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    scales_pascal = [
        0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05
    ]  # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
    #scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
    scales = scales_pascal
    aspect_ratios = [
        [1.0,
         2.0, 0.5], [1.0, 2.0, 0.5, 3.0,
                     1.0 / 3.0],
        [1.0, 2.0, 0.5,
         3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0
                           ], [1.0, 2.0, 0.5
                               ], [1.0, 2.0, 0.5]
    ]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1 = True
    steps = [
        8, 16, 32, 64, 100, 300
    ]  # The space between two adjacent anchor box center points for each predictor layer.
    offsets = [
        0.5, 0.5, 0.5, 0.5, 0.5, 0.5
    ]  # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
    clip_boxes = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
    variances = [
        0.1, 0.1, 0.2, 0.2
    ]  # The variances by which the encoded target coordinates are divided as in the original implementation
    normalize_coords = True

    K.clear_session()  # Clear previous models from memory.

    model_path = config['train']['saved_weights_name']
    # 3: Instantiate an optimizer and the SSD loss function and compile the model.
    #    If you want to follow the original Caffe implementation, use the preset SGD
    #    optimizer, otherwise I'd recommend the commented-out Adam optimizer.

    if config['model']['backend'] == 'ssd512':
        aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                         [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5],
                         [1.0, 2.0, 0.5]]
        steps = [
            8, 16, 32, 64, 100, 200, 300
        ]  # The space between two adjacent anchor box center points for each predictor layer.
        offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
        scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05]

    elif config['model']['backend'] == 'ssd7':
        #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'
        scales = [
            0.08, 0.16, 0.32, 0.64, 0.96
        ]  # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
        aspect_ratios = [0.5, 1.0,
                         2.0]  # The list of aspect ratios for the anchor boxes
        two_boxes_for_ar1 = True  # Whether or not you want to generate two anchor boxes for aspect ratio 1
        steps = None  # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
        offsets = None

    if os.path.exists(model_path):
        print("\nLoading pretrained weights.\n")
        # We need to create an SSDLoss object in order to pass that to the model loader.
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        K.clear_session()  # Clear previous models from memory.
        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'compute_loss': ssd_loss.compute_loss
                           })

    else:
        ####################################
        #   Build the Keras model.
        ###################################

        if config['model']['backend'] == 'ssd300':
            #weights_path = 'VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5'
            from models.keras_ssd300 import ssd_300 as ssd

            model = ssd_300(image_size=(img_height, img_width, img_channels),
                            n_classes=n_classes,
                            mode='training',
                            l2_regularization=0.0005,
                            scales=scales,
                            aspect_ratios_per_layer=aspect_ratios,
                            two_boxes_for_ar1=two_boxes_for_ar1,
                            steps=steps,
                            offsets=offsets,
                            clip_boxes=clip_boxes,
                            variances=variances,
                            normalize_coords=normalize_coords,
                            subtract_mean=mean_color,
                            swap_channels=swap_channels)

        elif config['model']['backend'] == 'ssd512':
            #weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5'
            from models.keras_ssd512 import ssd_512 as ssd

            # 2: Load some weights into the model.
            model = ssd(image_size=(img_height, img_width, img_channels),
                        n_classes=n_classes,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_per_layer=aspect_ratios,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        swap_channels=swap_channels)

        elif config['model']['backend'] == 'ssd7':
            #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'
            from models.keras_ssd7 import build_model as ssd
            scales = [
                0.08, 0.16, 0.32, 0.64, 0.96
            ]  # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
            aspect_ratios = [
                0.5, 1.0, 2.0
            ]  # The list of aspect ratios for the anchor boxes
            two_boxes_for_ar1 = True  # Whether or not you want to generate two anchor boxes for aspect ratio 1
            steps = None  # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
            offsets = None
            model = ssd(image_size=(img_height, img_width, img_channels),
                        n_classes=n_classes,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_global=aspect_ratios,
                        aspect_ratios_per_layer=None,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        subtract_mean=None,
                        divide_by_stddev=None)

        else:
            print('Wrong Backend')

        print('OK create model')
        #sgd = SGD(lr=config['train']['learning_rate'], momentum=0.9, decay=0.0, nesterov=False)

        # TODO: Set the path to the weights you want to load. only for ssd300 or ssd512

        weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5'
        print("\nLoading pretrained weights VGG.\n")
        model.load_weights(weights_path, by_name=True)

        # 3: Instantiate an optimizer and the SSD loss function and compile the model.
        #    If you want to follow the original Caffe implementation, use the preset SGD
        #    optimizer, otherwise I'd recommend the commented-out Adam optimizer.

        #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
        #sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
        optimizer = Adam(lr=config['train']['learning_rate'],
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=1e-08,
                         decay=0.0)
        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
        model.compile(optimizer=optimizer, loss=ssd_loss.compute_loss)

        model.summary()

    #####################################################################
    #  Instantiate two `DataGenerator` objects: One for training, one for validation.
    ######################################################################
    # Optional: If you have enough memory, consider loading the images into memory for the reasons explained above.

    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.

    # The XML parser needs to now what object class names to look for and in which order to map them to integers.
    classes = ['background'] + labels

    train_dataset.parse_xml(
        images_dirs=[config['train']['train_image_folder']],
        image_set_filenames=[config['train']['train_image_set_filename']],
        annotations_dirs=[config['train']['train_annot_folder']],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=False,
        ret=False)

    val_dataset.parse_xml(
        images_dirs=[config['valid']['valid_image_folder']],
        image_set_filenames=[config['valid']['valid_image_set_filename']],
        annotations_dirs=[config['valid']['valid_annot_folder']],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=False,
        ret=False)

    #########################
    # 3: Set the batch size.
    #########################
    batch_size = config['train'][
        'batch_size']  # Change the batch size if you like, or if you run into GPU memory issues.

    ##########################
    # 4: Set the image transformations for pre-processing and data augmentation options.
    ##########################
    # For the training generator:

    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    ######################################3
    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.
    #########################################
    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    if config['model']['backend'] == 'ssd512':
        predictor_sizes = [
            model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
            model.get_layer('fc7_mbox_conf').output_shape[1:3],
            model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
        ]

        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_per_layer=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=normalize_coords)

    elif config['model']['backend'] == 'ssd300':
        predictor_sizes = [
            model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
            model.get_layer('fc7_mbox_conf').output_shape[1:3],
            model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
        ]
        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_per_layer=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=normalize_coords)

    elif config['model']['backend'] == 'ssd7':
        predictor_sizes = [
            model.get_layer('classes4').output_shape[1:3],
            model.get_layer('classes5').output_shape[1:3],
            model.get_layer('classes6').output_shape[1:3],
            model.get_layer('classes7').output_shape[1:3]
        ]
        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_global=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.3,
            normalize_coords=normalize_coords)

    #######################
    # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.
    #######################

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[
            SSDDataAugmentation(img_height=img_height, img_width=img_width)
        ],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        val_dataset_size))

    ##########################
    # Define model callbacks.
    #########################

    # TODO: Set the filepath under which you want to save the model.
    model_checkpoint = ModelCheckpoint(
        filepath=config['train']['saved_weights_name'],
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)
    #model_checkpoint.best =

    csv_logger = CSVLogger(filename='log.csv', separator=',', append=True)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan
    ]

    #print(model.summary())
    batch_images, batch_labels = next(train_generator)

    #    i = 0 # Which batch item to look at
    #
    #    print("Image:", batch_filenames[i])
    #    print()
    #    print("Ground truth boxes:\n")
    #    print(batch_labels[i])

    initial_epoch = 0
    final_epoch = config['train']['nb_epochs']
    #final_epoch     = 20
    steps_per_epoch = 500

    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=ceil(val_dataset_size /
                                                        batch_size),
                                  initial_epoch=initial_epoch,
                                  verbose=1 if config['train']['debug'] else 2)
    def predict_on_dataset(self,
                           img_height,
                           img_width,
                           batch_size,
                           data_generator_mode='resize',
                           decoding_confidence_thresh=0.01,
                           decoding_iou_threshold=0.45,
                           decoding_top_k=200,
                           decoding_pred_coords='centroids',
                           decoding_normalize_coords=True,
                           decoding_border_pixels='include',
                           round_confidences=False,
                           verbose=True,
                           ret=False):
        '''
        Runs predictions for the given model over the entire dataset given by `data_generator`.

        Arguments:
            img_height (int): The input image height for the model.
            img_width (int): The input image width for the model.
            batch_size (int): The batch size for the evaluation.
            data_generator_mode (str, optional): Either of 'resize' and 'pad'. If 'resize', the input images will
                be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images.
                If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height`
                and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images.
            decoding_confidence_thresh (float, optional): Only relevant if the model is in 'training' mode.
                A float in [0,1), the minimum classification confidence in a specific positive class in order to be considered
                for the non-maximum suppression stage for the respective class. A lower value will result in a larger part of the
                selection process being done by the non-maximum suppression stage, while a larger value will result in a larger
                part of the selection process happening in the confidence thresholding stage.
            decoding_iou_threshold (float, optional): Only relevant if the model is in 'training' mode. A float in [0,1].
                All boxes with a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed
                from the set of predictions for a given class, where 'maximal' refers to the box score.
            decoding_top_k (int, optional): Only relevant if the model is in 'training' mode. The number of highest scoring
                predictions to be kept for each batch item after the non-maximum suppression stage.
            decoding_input_coords (str, optional): Only relevant if the model is in 'training' mode. The box coordinate format
                that the model outputs. Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height),
                'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
            decoding_normalize_coords (bool, optional): Only relevant if the model is in 'training' mode. Set to `True` if the model
                outputs relative coordinates. Do not set this to `True` if the model already outputs absolute coordinates,
                as that would result in incorrect coordinates.
            round_confidences (int, optional): `False` or an integer that is the number of decimals that the prediction
                confidences will be rounded to. If `False`, the confidences will not be rounded.
            verbose (bool, optional): If `True`, will print out the progress during runtime.
            ret (bool, optional): If `True`, returns the predictions.

        Returns:
            None by default. Optionally, a nested list containing the predictions for each class.
        '''

        class_id_pred = self.pred_format['class_id']
        conf_pred     = self.pred_format['conf']
        xmin_pred     = self.pred_format['xmin']
        ymin_pred     = self.pred_format['ymin']
        xmax_pred     = self.pred_format['xmax']
        ymax_pred     = self.pred_format['ymax']

        #############################################################################################
        # Configure the data generator for the evaluation.
        #############################################################################################

        convert_to_3_channels = ConvertTo3Channels()
        resize = Resize(height=img_height,width=img_width, labels_format=self.gt_format)
        if data_generator_mode == 'resize':
            transformations = [convert_to_3_channels,
                               resize]
        elif data_generator_mode == 'pad':
            random_pad = RandomPadFixedAR(patch_aspect_ratio=img_width/img_height, labels_format=self.gt_format)
            transformations = [convert_to_3_channels,
                               random_pad,
                               resize]
        else:
            raise ValueError("`data_generator_mode` can be either of 'resize' or 'pad', but received '{}'.".format(data_generator_mode))

        # Set the generator parameters.
        generator = self.data_generator.generate(batch_size=batch_size,
                                                 shuffle=False,
                                                 transformations=transformations,
                                                 label_encoder=None,
                                                 returns={'processed_images',
                                                          'image_ids',
                                                          'evaluation-neutral',
                                                          'inverse_transform',
                                                          'original_labels'},
                                                 keep_images_without_gt=True,
                                                 degenerate_box_handling='remove')

        if self.data_generator.image_ids is None:
            self.data_generator.image_ids = list(range(self.data_generator.get_dataset_size()))

        #############################################################################################
        # Predict over all batches of the dataset and store the predictions.
        #############################################################################################

        # We have to generate a separate results list for each class.
        results = [list() for _ in range(self.n_classes + 1)]

        # Create a dictionary that maps image IDs to ground truth annotations.
        # We'll need it below.
        image_ids_to_labels = {}

        # Compute the number of batches to iterate over the entire dataset.
        n_images = self.data_generator.get_dataset_size()
        n_batches = int(ceil(n_images / batch_size))
        if verbose:
            print("Number of images in the evaluation dataset: {}".format(n_images))
            print()
            tr = trange(n_batches, file=sys.stdout)
            tr.set_description('Producing predictions batch-wise')
        else:
            tr = range(n_batches)

        for j in tr:
            # Generate batch.
            batch_X, batch_image_ids, batch_eval_neutral, batch_inverse_transforms, batch_orig_labels = next(generator)
            # Predict.
            y_pred = self.model.predict(batch_X)
            # If the model was created in 'training' mode, the raw predictions need to
            # be decoded and filtered, otherwise that's already taken care of.
            if self.model_mode == 'training':
                # Decode.
                y_pred = decode_detections(y_pred,
                                           confidence_thresh=decoding_confidence_thresh,
                                           iou_threshold=decoding_iou_threshold,
                                           top_k=decoding_top_k,
                                           input_coords=decoding_pred_coords,
                                           normalize_coords=decoding_normalize_coords,
                                           img_height=img_height,
                                           img_width=img_width,
                                           border_pixels=decoding_border_pixels)
            else:
                # Filter out the all-zeros dummy elements of `y_pred`.
                y_pred_filtered = []
                for i in range(len(y_pred)):
                    y_pred_filtered.append(y_pred[i][y_pred[i,:,0] != 0])
                y_pred = y_pred_filtered
            # Convert the predicted box coordinates for the original images.
            y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms)

            # Iterate over all batch items.
            for k, batch_item in enumerate(y_pred):
                image_id = batch_image_ids[k]
                path='/data/deeplearn/SWEIPENet/dataset/Detections/detection'+ str(self.modelindex)
                if not os.path.exists(path):
                    os.mkdir(path)
                txtpath = os.path.join(path, image_id + '.txt')
                if not os.path.exists(txtpath):
                    os.mknod(txtpath)
                file_fid = open(txtpath, 'w')
                for box in batch_item:
                    class_id = int(box[class_id_pred])
                    # Round the box coordinates to reduce the required memory.
                    if round_confidences:
                        confidence = round(box[conf_pred], round_confidences)
                    else:
                        confidence = box[conf_pred]
                    xmin = round(box[xmin_pred], 1)
                    ymin = round(box[ymin_pred], 1)
                    xmax = round(box[xmax_pred], 1)
                    ymax = round(box[ymax_pred], 1)
                    prediction = (image_id, confidence, xmin, ymin, xmax, ymax)
                    # write detections of each image into Detections/imname.txt
                    if class_id == 1:
                        class_name = 'seacucumber'
                    if class_id == 2:
                        class_name = 'seaurchin'
                    if class_id == 3:
                        class_name = 'scallop'
                    boxstr = class_name + ' ' + str(confidence)+ ' ' + str(xmin) + ' ' + str(ymin) + ' ' + str(xmax) + ' ' + str(ymax)
                    file_fid.write(boxstr + '\n')

                    results[class_id].append(prediction)
                file_fid.close()
        self.prediction_results = results
        if ret:
            return results
Ejemplo n.º 12
0
def predicting(images,
               image_path,
               labels_output_format=('class_id', 'xmin', 'ymin', 'xmax',
                                     'ymax')):

    labels_format = {
        'class_id': labels_output_format.index('class_id'),
        'xmin': labels_output_format.index('xmin'),
        'ymin': labels_output_format.index('ymin'),
        'xmax': labels_output_format.index('xmax'),
        'ymax': labels_output_format.index('ymax')
    }

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    generate_pre = Data_Generator(image=images,
                                  image_path=image_path,
                                  labels_format=labels_format)
    predict_generator = generate_pre.generate(
        batch_size=1,
        transformations=[convert_to_3_channels, resize],
        label_encoder=None,
        returns={
            'processed_images', 'filenames', 'inverse_transform',
            'original_images', 'original_labels'
        },
        keep_images_without_gt=False)

    batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
        predict_generator)

    i = 0

    print("Image:", "????")

    global graph
    with graph.as_default():
        y_pred = model.predict(batch_images)

    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.25,
                                       iou_threshold=0.4,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    print(y_pred_decoded)

    y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded,
                                                  batch_inverse_transforms)
    # print(y_pred_decoded_inv)
    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    # print("Predicted boxes:\n")
    # print('   class   conf xmin   ymin   xmax   ymax')
    # print(y_pred_decoded_inv[i])

    # Set the colors for the bounding boxes
    # plt.figure(figsize=(20, 12))
    # plt.imshow(batch_original_images[i])
    # colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist()
    # current_axis = plt.gca()
    #
    # for box in y_pred_decoded_inv[i]:
    #     xmin = box[2]
    #     ymin = box[3]
    #     xmax = box[4]
    #     ymax = box[5]
    #     # color = colors[int(box[0])]
    #     # plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)
    #     color = colors[int(box[0])]
    #     label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
    #     current_axis.add_patch(
    #         plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2))
    #     current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
    #
    # # plt.show()
    # plt.xticks([])
    # plt.yticks([])
    # plt.savefig(self.image_path)

    # K.clear_session()
    # gc.collect()

    return y_pred_decoded_inv
Ejemplo n.º 13
0
def run(train_dir, valid_dir, set_dir, model_dir):
    # train_dir = arguments.train_dir
    # valid_dir = arguments.valid_dir

    train_dataset_dir = train_dir
    train_annot_dir = train_dir + '/annot/'
    train_set = train_dir + '/img_set.txt'

    valid_dataset_dir = valid_dir
    valid_annot_dir = valid_dir + '/annot/'
    valid_set = valid_dir + '/valid_set.txt'

    # Set Training and Validation dataset paths
    batch_size = 16
    print('Using batch size of: {}'.format(batch_size))
    #model_path = 'COCO_512.h5'
    model_path = model_dir
    # model_path = 'saved_model.h5'
    # Needs to know classes and order to map to integers
    classes = ['background', 'car', 'bus', 'truck']
    # Set required parameters for training of SSD
    img_height = 512
    img_width = 512
    img_channels = 3  # Colour image
    mean_color = [123, 117, 104]  # DO NOT CHANGE
    swap_channels = [2, 1, 0]  # Original SSD used BGR
    n_classes = 3  # 80 for COCO
    scales_coco = [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9, 1.06]
    scales = scales_coco
    aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5],
                     [1.0, 2.0, 0.5]]
    two_boxes_for_ar1 = True
    steps = [8, 16, 32, 64, 128, 256, 512]
    offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
    clip_boxes = False
    variances = [0.1, 0.1, 0.2, 0.2]
    normalize_coords = True
    K.clear_session()

    model = ssd_512(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)
    model.load_weights(model_path, by_name=True)

    sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

    model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

    # model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
    #                                   'L2Normalization': L2Normalization,

    #                                   'compute_loss': ssd_loss.compute_loss})
    # Create Data Generators for train and valid sets
    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    valid_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    train_dataset.parse_xml(images_dirs=[train_dataset_dir],
                            image_set_filenames=[train_set],
                            annotations_dirs=[train_annot_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)

    valid_dataset.parse_xml(images_dirs=[valid_dataset_dir],
                            image_set_filenames=[valid_set],
                            annotations_dirs=[valid_annot_dir],
                            classes=classes,
                            include_classes='all',
                            exclude_truncated=False,
                            exclude_difficult=False,
                            ret=False)

    # Will speed up trainig but requires more memory
    # Can comment out to avoid memory requirements
    '''
    train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)

    valid_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                      resize=False,
                                      variable_image_size=True,
                                      verbose=True)
    '''

    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = valid_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    valid_dataset_size = valid_dataset.get_dataset_size()

    print("Number of images in the training dataset:\t{:>6}".format(
        train_dataset_size))
    print("Number of images in the validation dataset:\t{:>6}".format(
        valid_dataset_size))

    model_checkpoint = ModelCheckpoint(
        filepath=
        'ssd_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)

    #csv_logger = CSVLogger(filename='ssd512_training_log.csv',
    #                       separator=',',
    #                       append=True)

    learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                    verbose=1)

    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan
    ]

    #callbacks = [learning_rate_scheduler,
    #             terminate_on_nan]

    initial_epoch = 0
    final_epoch = 150  # 150
    steps_per_epoch = math.ceil(119 /
                                batch_size)  # ceil(num_samples/batch_size)

    # Training
    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=final_epoch,
                                  callbacks=callbacks,
                                  validation_data=val_generator,
                                  validation_steps=math.ceil(
                                      valid_dataset_size / batch_size),
                                  initial_epoch=initial_epoch)

    # Save final trained model
    model.save('trained.h5')

    # Make predictions
    predict_generator = valid_dataset.generate(
        batch_size=1,
        shuffle=True,
        transformations=[convert_to_3_channels, resize],
        label_encoder=None,
        returns={
            'processed_images', 'filenames', 'inverse_transform',
            'original_images', 'original_labels'
        },
        keep_images_without_gt=False)

    batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
        predict_generator)

    i = 0  # Which batch item to look at

    print("Image:", batch_filenames[i])
    print()
    print("Ground truth boxes:\n")
    print(np.array(batch_original_labels[i]))

    y_pred = model.predict(batch_images)
    y_pred_decoded = decode_detections(y_pred,
                                       confidence_thresh=0.2,
                                       iou_threshold=0.4,
                                       top_k=200,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)

    y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded,
                                                  batch_inverse_transforms)

    np.set_printoptions(precision=2, suppress=True, linewidth=90)
    print("Predicted boxes:\n")
    print('   class   conf xmin   ymin   xmax   ymax')
    print(y_pred_decoded_inv[i])

    # Set the colors for the bounding boxes
    colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist()
    # classes = ['background', 'car', 'bus', 'truck', 'motorbike'] # Already set at start

    plt.figure(figsize=(20, 12))
    plt.imshow(batch_original_images[i])

    current_axis = plt.gca()

    for box in batch_original_labels[i]:
        xmin = box[1]
        ymin = box[2]
        xmax = box[3]
        ymax = box[4]
        label = '{}'.format(classes[int(box[0])])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin),
                          xmax - xmin,
                          ymax - ymin,
                          color='green',
                          fill=False,
                          linewidth=2))
        current_axis.text(xmin,
                          ymin,
                          label,
                          size='x-large',
                          color='white',
                          bbox={
                              'facecolor': 'green',
                              'alpha': 1.0
                          })

    for box in y_pred_decoded_inv[i]:
        xmin = box[2]
        ymin = box[3]
        xmax = box[4]
        ymax = box[5]
        color = colors[int(box[0])]
        label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
        current_axis.add_patch(
            plt.Rectangle((xmin, ymin),
                          xmax - xmin,
                          ymax - ymin,
                          color=color,
                          fill=False,
                          linewidth=2))
        current_axis.text(xmin,
                          ymin,
                          label,
                          size='x-large',
                          color='white',
                          bbox={
                              'facecolor': color,
                              'alpha': 1.0
                          })

    plt.show()

    return
Ejemplo n.º 14
0
def predict_all_to_txt(model,
                       img_height,
                       img_width,
                       data_generator,
                       batch_size,
                       data_generator_mode='resize',
                       classes=[
                           'background', 'aeroplane', 'bicycle', 'bird',
                           'boat', 'bottle', 'bus', 'car', 'cat', 'chair',
                           'cow', 'diningtable', 'dog', 'horse', 'motorbike',
                           'person', 'pottedplant', 'sheep', 'sofa', 'train',
                           'tvmonitor'
                       ],
                       out_file_prefix='comp3_det_test_',
                       model_mode='training',
                       confidence_thresh=0.01,
                       iou_threshold=0.45,
                       top_k=200,
                       pred_coords='centroids',
                       normalize_coords=True):
    '''
    Runs detection predictions over the whole dataset given a model and saves them in a text file
    in the Pascal VOC detection results format, i.e. the format in which the Pascal VOC test server
    expects results.

    This will result in `n_classes` text files, where each file contains the predictions for one class.

    Arguments:
        model (Keras model): A Keras SSD model object.
        img_height (int): The input image height for the model.
        img_width (int): The input image width for the model.
        data_generator (DataGenerator): A `DataGenerator` object with the evaluation dataset.
        batch_size (int): The batch size for the evaluation.
        data_generator_mode (str, optional): Either of 'resize' or 'pad'. If 'resize', the input images will
            be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images.
            If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height`
            and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images.
        classes (list or dict, optional): A list or dictionary maps the consecutive class IDs predicted by the model
            their respective name strings. The list must contain the background class for class ID zero.
        out_file_prefix (str, optional): A prefix for the output text file names. The suffix to each output text file name will
            be the respective class name followed by the `.txt` file extension. This string is also how you specify the directory
            in which the results are to be saved.
        model_mode (str, optional): The mode in which the model was created, i.e. 'training', 'inference' or 'inference_fast'.
            This is needed in order to know whether the model output is already decoded or still needs to be decoded. Refer to
            the model documentation for the meaning of the individual modes.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage. Defaults to 200, following the paper.
        input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids'
            for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format
            `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
            and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
            relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
            Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
            coordinates. Requires `img_height` and `img_width` if set to `True`.

    Returns:
        None.
    '''

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)
    if data_generator_mode == 'resize':
        transformations = [convert_to_3_channels, resize]
    elif data_generator_mode == 'pad':
        random_pad = RandomPadFixedAR(patch_aspect_ratio=img_width /
                                      img_height,
                                      clip_boxes=False)
        transformations = [convert_to_3_channels, random_pad, resize]
    else:
        raise ValueError(
            "Unexpected argument value: `data_generator_mode` can be either of 'resize' or 'pad', but received '{}'."
            .format(data_generator_mode))

    # Set the generator parameters.
    generator = data_generator.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=transformations,
        label_encoder=None,
        returns={'processed_images', 'image_ids', 'inverse_transform'},
        keep_images_without_gt=True)

    # We have to generate a separate results file for each class.
    results = []
    for i in range(1, len(classes)):
        # Create one text file per class and put it in our results list.
        results.append(
            open('{}{}.txt'.format(out_file_prefix, classes[i]), 'w'))

    # Compute the number of batches to iterate over the entire dataset.
    n_images = data_generator.get_dataset_size()
    print("Number of images in the evaluation dataset: {}".format(n_images))
    n_batches = int(ceil(n_images / batch_size))
    # Loop over all batches.
    tr = trange(n_batches, file=sys.stdout)
    tr.set_description('Producing results files')
    for j in tr:
        # Generate batch.
        batch_X, batch_image_ids, batch_inverse_transforms = next(generator)
        # Predict.
        y_pred = model.predict(batch_X)
        # If the model was created in 'training' mode, the raw predictions need to
        # be decoded and filtered, otherwise that's already taken care of.
        if model_mode == 'training':
            # Decode.
            y_pred = decode_y(y_pred,
                              confidence_thresh=confidence_thresh,
                              iou_threshold=iou_threshold,
                              top_k=top_k,
                              input_coords=pred_coords,
                              normalize_coords=normalize_coords,
                              img_height=img_height,
                              img_width=img_width)
        else:
            # Filter out the all-zeros dummy elements of `y_pred`.
            y_pred_filtered = []
            for i in range(len(y_pred)):
                y_pred_filtered.append(y_pred[i][y_pred[i, :, 0] != 0])
            y_pred = y_pred_filtered
        # Convert the predicted box coordinates for the original images.
        y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms)

        # Convert each predicted box into the results format.
        for k, batch_item in enumerate(y_pred):
            for box in batch_item:
                image_id = batch_image_ids[k]
                class_id = int(box[0])
                # Round the box coordinates to reduce the file size.
                confidence = str(round(box[1], 4))
                xmin = str(round(box[2], 1))
                ymin = str(round(box[3], 1))
                xmax = str(round(box[4], 1))
                ymax = str(round(box[5], 1))
                prediction = [image_id, confidence, xmin, ymin, xmax, ymax]
                prediction_txt = ' '.join(prediction) + '\n'
                # Write the predicted box to the text file for its class.
                results[class_id - 1].write(prediction_txt)

    # Close all the files.
    for results_file in results:
        results_file.close()

    print("All results files saved.")
def ssd_model(config: Dict, train_dataset, val_dataset, callbacks_list):
    """Training SSD model
    
    Parameters
    ----------
    config : Dict
        Config yaml/json containing all parameter
    """
    start_train = timer()

    img_height = config['training']['img_height']  # Height  input images
    img_width = config['training']['img_width']  # Width  input images
    img_channels = config['training'][
        'img_channels']  # Number of color channels
    n_classes = config['training'][
        'n_classes']  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO

    model = ssd_300(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=config['training']['l2_regularization'],
                    scales=scales,
                    aspect_ratios_per_layer=aspect_ratios,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=mean_color,
                    swap_channels=swap_channels)

    weights_path = './weights/VGG_ILSVRC_16_layers_fc_reduced.h5'
    model.load_weights(weights_path, by_name=True)

    adam = Adam(lr=config['training']['learning_rate'],
                beta_1=0.9,
                beta_2=0.999,
                epsilon=1e-08,
                decay=0.0)

    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    batch_size = config['training']['batch_size']

    ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                img_width=img_width,
                                                background=mean_color)

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)

    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(img_height=img_height,
                                        img_width=img_width,
                                        n_classes=n_classes,
                                        predictor_sizes=predictor_sizes,
                                        scales=scales,
                                        aspect_ratios_per_layer=aspect_ratios,
                                        two_boxes_for_ar1=two_boxes_for_ar1,
                                        steps=steps,
                                        offsets=offsets,
                                        clip_boxes=clip_boxes,
                                        variances=variances,
                                        matching_type='multi',
                                        pos_iou_threshold=0.5,
                                        neg_iou_limit=0.5,
                                        normalize_coords=normalize_coords)

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    # Get the number of samples in the training and validations datasets.
    train_dataset_size = train_dataset.get_dataset_size()
    val_dataset_size = val_dataset.get_dataset_size()

    print(
        f"[INFO]...Number of images in the training dataset: {train_dataset_size}"
    )
    print(
        f"[INFO]...Number of images in the validation dataset: {val_dataset_size}"
    )
    print(
        f"[INFO]...Weights will be saved at {config['training']['weight_save_path']}"
    )
    history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=config['training']['steps_per_epoch'],
        epochs=config['training']['epochs'],
        callbacks=callbacks_list,
        validation_data=val_generator,
        validation_steps=ceil(val_dataset_size / batch_size))
    end_train = timer()
    print(
        f"[INFO]...Total time taken by Training Job is {(end_train - start_train)/60:.2f} min(s)"
    )
Ejemplo n.º 16
0
def predict_all_to_json(out_file,
                        model,
                        img_height,
                        img_width,
                        classes_to_cats,
                        data_generator,
                        batch_size,
                        data_generator_mode='resize',
                        model_mode='training',
                        confidence_thresh=0.01,
                        iou_threshold=0.45,
                        top_k=200,
                        pred_coords='centroids',
                        normalize_coords=True):
    '''
    Runs detection predictions over the whole dataset given a model and saves them in a JSON file
    in the MS COCO detection results format.

    Arguments:
        out_file (str): The file name (full path) under which to save the results JSON file.
        model (Keras model): A Keras SSD model object.
        img_height (int): The input image height for the model.
        img_width (int): The input image width for the model.
        classes_to_cats (dict): A dictionary that maps the consecutive class IDs predicted by the model
            to the non-consecutive original MS COCO category IDs.
        data_generator (DataGenerator): A `DataGenerator` object with the evaluation dataset.
        batch_size (int): The batch size for the evaluation.
        data_generator_mode (str, optional): Either of 'resize' or 'pad'. If 'resize', the input images will
            be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images.
            If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height`
            and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images.
        model_mode (str, optional): The mode in which the model was created, i.e. 'training', 'inference' or 'inference_fast'.
            This is needed in order to know whether the model output is already decoded or still needs to be decoded. Refer to
            the model documentation for the meaning of the individual modes.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage. Defaults to 200, following the paper.
        input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids'
            for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format
            `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
            and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
            relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
            Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
            coordinates. Requires `img_height` and `img_width` if set to `True`.

    Returns:
        None.
    '''

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height,width=img_width)
    if data_generator_mode == 'resize':
        transformations = [convert_to_3_channels,
                           resize]
    elif data_generator_mode == 'pad':
        random_pad = RandomPadFixedAR(patch_aspect_ratio=img_width/img_height, clip_boxes=False)
        transformations = [convert_to_3_channels,
                           random_pad,
                           resize]
    else:
        raise ValueError("Unexpected argument value: `data_generator_mode` can be either of 'resize' or 'pad', but received '{}'.".format(data_generator_mode))

    # Set the generator parameters.
    generator = data_generator.generate(batch_size=batch_size,
                                        shuffle=False,
                                        transformations=transformations,
                                        label_encoder=None,
                                        returns={'processed_images',
                                                 'image_ids',
                                                 'inverse_transform'},
                                        keep_images_without_gt=True)
    # Put the results in this list.
    results = []
    # Compute the number of batches to iterate over the entire dataset.
    n_images = data_generator.get_dataset_size()
    print("Number of images in the evaluation dataset: {}".format(n_images))
    n_batches = int(ceil(n_images / batch_size))
    # Loop over all batches.
    tr = trange(n_batches, file=sys.stdout)
    tr.set_description('Producing results file')
    for i in tr:
        # Generate batch.
        batch_X, batch_image_ids, batch_inverse_transforms = next(generator)
        # Predict.
        y_pred = model.predict(batch_X)
        # If the model was created in 'training' mode, the raw predictions need to
        # be decoded and filtered, otherwise that's already taken care of.
        if model_mode == 'training':
            # Decode.
            y_pred = decode_detections(y_pred,
                                       confidence_thresh=confidence_thresh,
                                       iou_threshold=iou_threshold,
                                       top_k=top_k,
                                       input_coords=pred_coords,
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)
        else:
            # Filter out the all-zeros dummy elements of `y_pred`.
            y_pred_filtered = []
            for i in range(len(y_pred)):
                y_pred_filtered.append(y_pred[i][y_pred[i,:,0] != 0])
            y_pred = y_pred_filtered
        # Convert the predicted box coordinates for the original images.
        y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms)

        # Convert each predicted box into the results format.
        for k, batch_item in enumerate(y_pred):
            for box in batch_item:
                class_id = box[0]
                # Transform the consecutive class IDs back to the original COCO category IDs.
                cat_id = classes_to_cats[class_id]
                # Round the box coordinates to reduce the JSON file size.
                xmin = float(round(box[2], 1))
                ymin = float(round(box[3], 1))
                xmax = float(round(box[4], 1))
                ymax = float(round(box[5], 1))
                width = xmax - xmin
                height = ymax - ymin
                bbox = [xmin, ymin, width, height]
                result = {}
                result['image_id'] = batch_image_ids[k]
                result['category_id'] = cat_id
                result['score'] = float(round(box[1], 3))
                result['bbox'] = bbox
                results.append(result)

    with open(out_file, 'w') as f:
        json.dump(results, f)

    print("Prediction results saved in '{}'".format(out_file))
Ejemplo n.º 17
0
def get_dataset(
    args: argparse.Namespace, model: Model
) -> Tuple[Iterable[List[np.array]], Iterable[List[np.array]], int]:
    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)

    VOC_2007_images_dir = os.path.join(args.data_dir, '/VOC2007/JPEGImages/')
    VOC_2012_images_dir = os.path.join(args.data_dir, '/VOC2012/JPEGImages/')

    VOC_2007_annotations_dir = os.path.join(args.data_dir,
                                            '/VOC2007/Annotations/')
    VOC_2012_annotations_dir = os.path.join(args.data_dir,
                                            '/VOC2012/Annotations/')

    VOC_2007_trainval_image_set_filename = os.path.join(
        args.data_dir, '/VOC2007/ImageSets/Main/trainval.txt')
    VOC_2012_trainval_image_set_filename = os.path.join(
        args.data_dir, '/VOC2012/ImageSets/Main/trainval.txt')
    VOC_2007_test_image_set_filename = os.path.join(
        args.data_dir, '/VOC2007/ImageSets/Main/test.txt')

    classes = [
        'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
        'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
        'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
        'tvmonitor'
    ]

    train_dataset.parse_xml(
        images_dirs=[VOC_2007_images_dir, VOC_2012_images_dir],
        image_set_filenames=[
            VOC_2007_trainval_image_set_filename,
            VOC_2012_trainval_image_set_filename
        ],
        annotations_dirs=[VOC_2007_annotations_dir, VOC_2012_annotations_dir],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=False,
        ret=False)

    val_dataset.parse_xml(
        images_dirs=[VOC_2007_images_dir],
        image_set_filenames=[VOC_2007_test_image_set_filename],
        annotations_dirs=[VOC_2007_annotations_dir],
        classes=classes,
        include_classes='all',
        exclude_truncated=False,
        exclude_difficult=True,
        ret=False)

    train_dataset.create_hdf5_dataset(
        file_path='dataset_pascal_voc_07+12_trainval.h5',
        resize=False,
        variable_image_size=True,
        verbose=True)

    val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5',
                                    resize=False,
                                    variable_image_size=True,
                                    verbose=True)

    ssd_data_augmentation = SSDDataAugmentation(img_height=args.img_height,
                                                img_width=args.img_width,
                                                background=args.mean_color)

    # For the validation generator:
    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=args.img_height, width=args.img_width)

    # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
    predictor_sizes = [
        model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
        model.get_layer('fc7_mbox_conf').output_shape[1:3],
        model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
        model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
    ]

    ssd_input_encoder = SSDInputEncoder(
        img_height=args.img_height,
        img_width=args.img_width,
        n_classes=args.n_classes,
        predictor_sizes=predictor_sizes,
        scales=args.scales,
        aspect_ratios_per_layer=args.aspect_ratios,
        two_boxes_for_ar1=args.two_boxes_for_ar1,
        steps=args.steps,
        offsets=args.offsets,
        clip_boxes=args.clip_boxes,
        variances=args.variances,
        matching_type='multi',
        pos_iou_threshold=0.5,
        neg_iou_limit=0.5,
        normalize_coords=args.normalize_coords)

    train_generator = train_dataset.generate(
        batch_size=args.batch_size,
        shuffle=True,
        transformations=[ssd_data_augmentation],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)

    val_generator = val_dataset.generate(
        batch_size=args.batch_size,
        shuffle=False,
        transformations=[convert_to_3_channels, resize],
        label_encoder=ssd_input_encoder,
        returns={'processed_images', 'encoded_labels'},
        keep_images_without_gt=False)
    return train_generator, val_generator, val_dataset.get_dataset_size()
Ejemplo n.º 18
0
def main(job_dir, **args):
    ##Setting up the path for saving logs
    logs_dir = job_dir + 'logs/'
    data_dir = "gs://deeplearningteam11/data"

    print("Current Directory: " + os.path.dirname(__file__))
    print("Lets copy the data to: " + os.path.dirname(__file__))
    os.system("gsutil -m cp -r " + data_dir + "  " +
              os.path.dirname(__file__) + " > /dev/null 2>&1 ")
    #exit(0)

    with tf.device('/device:GPU:0'):
        # 1: Build the Keras model.
        K.clear_session()  # Clear previous models from memory.
        model = ssd_300(image_size=(img_height, img_width, img_channels),
                        n_classes=n_classes,
                        mode='training',
                        l2_regularization=0.0005,
                        scales=scales,
                        aspect_ratios_per_layer=aspect_ratios,
                        two_boxes_for_ar1=two_boxes_for_ar1,
                        steps=steps,
                        offsets=offsets,
                        clip_boxes=clip_boxes,
                        variances=variances,
                        normalize_coords=normalize_coords,
                        subtract_mean=mean_color,
                        swap_channels=swap_channels)

        # Set the path to the `.h5` file of the model to be loaded.
        model_file = file_io.FileIO('gs://deeplearningteam11/vgg19BNmodel.h5',
                                    mode='rb')

        # Store model locally on instance
        model_path = 'model.h5'
        with open(model_path, 'wb') as f:
            f.write(model_file.read())
        model_file.close()

        ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

        model = load_model(model_path,
                           custom_objects={
                               'AnchorBoxes': AnchorBoxes,
                               'L2Normalization': L2Normalization,
                               'DecodeDetections': DecodeDetections,
                               'compute_loss': ssd_loss.compute_loss
                           })

        for layer in model.layers:
            layer.trainable = True

        model.summary()

        # 1: Instantiate two `DataGenerator` objects: One for training, one for validation.
        train_dataset = DataGenerator(load_images_into_memory=True,
                                      hdf5_dataset_path=None)
        val_dataset = DataGenerator(load_images_into_memory=True,
                                    hdf5_dataset_path=None)

        # 2: Parse the image and label lists for the training and validation datasets. This can take a while.
        #  VOC 2007
        #  The directories that contain the images.
        VOC_2007_train_images_dir = 'data/data/VOC2007/train/JPEGImages/'
        VOC_2007_test_images_dir = 'data/data/VOC2007/test/JPEGImages/'

        VOC_2007_train_anns_dir = 'data/data/VOC2007/train/Annotations/'
        VOC_2007_test_anns_dir = 'data/data/VOC2007/test/Annotations/'

        # The paths to the image sets.
        VOC_2007_trainval_image_set_dir = 'data/data/VOC2007/train/ImageSets/Main/'
        VOC_2007_test_image_set_dir = 'data/data/VOC2007/test/ImageSets/Main/'

        VOC_2007_train_images_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_train_images_dir
        VOC_2007_test_images_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_test_images_dir

        VOC_2007_train_anns_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_train_anns_dir
        VOC_2007_test_anns_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_test_anns_dir

        VOC_2007_trainval_image_set_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_trainval_image_set_dir
        VOC_2007_test_image_set_dir = os.path.dirname(
            __file__) + "/" + VOC_2007_test_image_set_dir

        VOC_2007_trainval_image_set_filename = VOC_2007_trainval_image_set_dir + '/trainval.txt'
        VOC_2007_test_image_set_filename = VOC_2007_test_image_set_dir + '/test.txt'

        # The XML parser needs to now what object class names to look for and in which order to map them to integers.
        classes = [
            'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
            'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
            'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
            'tvmonitor'
        ]

        print("Parsing Training Data ...")
        train_dataset.parse_xml(
            images_dirs=[VOC_2007_train_images_dir],
            image_set_filenames=[VOC_2007_trainval_image_set_filename],
            annotations_dirs=[VOC_2007_train_anns_dir],
            classes=classes,
            include_classes='all',
            exclude_truncated=False,
            exclude_difficult=False,
            ret=False,
            verbose=False)
        print("Done")
        print(
            "================================================================")

        print("Parsing Test Data ...")
        val_dataset.parse_xml(
            images_dirs=[VOC_2007_test_images_dir],
            image_set_filenames=[VOC_2007_test_image_set_filename],
            annotations_dirs=[VOC_2007_test_anns_dir],
            classes=classes,
            include_classes='all',
            exclude_truncated=False,
            exclude_difficult=True,
            ret=False,
            verbose=False)
        print("Done")
        print(
            "================================================================")

        # 3: Set the batch size.
        batch_size = 32  # Change the batch size if you like, or if you run into GPU memory issues.

        #  4: Set the image transformations for pre-processing and data augmentation options.

        # For the training generator:
        ssd_data_augmentation = SSDDataAugmentation(img_height=img_height,
                                                    img_width=img_width,
                                                    background=mean_color)

        # For the validation generator:
        convert_to_3_channels = ConvertTo3Channels()
        resize = Resize(height=img_height, width=img_width)

        # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

        # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
        predictor_sizes = [
            model.get_layer('conv4_4_norm_mbox_conf').output_shape[1:3],
            model.get_layer('fc7_mbox_conf').output_shape[1:3],
            model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv10_2_mbox_conf').output_shape[1:3],
            model.get_layer('conv11_2_mbox_conf').output_shape[1:3]
        ]

        ssd_input_encoder = SSDInputEncoder(
            img_height=img_height,
            img_width=img_width,
            n_classes=n_classes,
            predictor_sizes=predictor_sizes,
            scales=scales,
            aspect_ratios_per_layer=aspect_ratios,
            two_boxes_for_ar1=two_boxes_for_ar1,
            steps=steps,
            offsets=offsets,
            clip_boxes=clip_boxes,
            variances=variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=normalize_coords)

        # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.

        train_generator = train_dataset.generate(
            batch_size=batch_size,
            shuffle=True,
            transformations=[ssd_data_augmentation],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)

        val_generator = val_dataset.generate(
            batch_size=batch_size,
            shuffle=False,
            transformations=[convert_to_3_channels, resize],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)

        # Get the number of samples in the training and validations datasets.
        train_dataset_size = train_dataset.get_dataset_size()
        val_dataset_size = val_dataset.get_dataset_size()

        print("Number of images in the training dataset:\t{:>6}".format(
            train_dataset_size))
        print("Number of images in the validation dataset:\t{:>6}".format(
            val_dataset_size))

        # Define a learning rate schedule.

        def lr_schedule(epoch):
            return 1e-6
            # if epoch < 80:
            #     return 0.001
            # elif epoch < 100:
            #     return 0.0001
            # else:
            #     return 0.00001

        learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule,
                                                        verbose=1)

        terminate_on_nan = TerminateOnNaN()

        callbacks = [learning_rate_scheduler, terminate_on_nan]

        # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly.
        initial_epoch = 120
        final_epoch = 200
        steps_per_epoch = 500

        history = model.fit_generator(generator=train_generator,
                                      steps_per_epoch=steps_per_epoch,
                                      epochs=final_epoch,
                                      callbacks=callbacks,
                                      validation_data=val_generator,
                                      validation_steps=ceil(val_dataset_size /
                                                            batch_size),
                                      initial_epoch=initial_epoch)

        model_name = "vgg19BNmodel_cont.h5"
        model.save(model_name)
        with file_io.FileIO(model_name, mode='rb') as input_f:
            with file_io.FileIO("gs://deeplearningteam11/" + model_name,
                                mode='w+') as output_f:
                output_f.write(input_f.read())
    def __init__(self,
                 resize_height,
                 resize_width,
                 random_brightness=(-48, 48, 0.5),
                 random_contrast=(0.5, 1.8, 0.5),
                 random_saturation=(0.5, 1.8, 0.5),
                 random_hue=(18, 0.5),
                 random_flip=0.5,
                 min_scale=0.3,
                 max_scale=2.0,
                 min_aspect_ratio=0.5,
                 max_aspect_ratio=2.0,
                 n_trials_max=3,
                 clip_boxes=True,
                 overlap_criterion='area',
                 bounds_box_filter=(0.3, 1.0),
                 bounds_validator=(0.5, 1.0),
                 n_boxes_min=1,
                 background=(0, 0, 0),
                 labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):

        self.n_trials_max = n_trials_max
        self.clip_boxes = clip_boxes
        self.overlap_criterion = overlap_criterion
        self.bounds_box_filter = bounds_box_filter
        self.bounds_validator = bounds_validator
        self.n_boxes_min = n_boxes_min
        self.background = background
        self.labels_format = labels_format

        # Determines which boxes are kept in an image after the transformations have been applied.
        self.box_filter_patch = BoxFilter(check_overlap=True,
                                          check_min_area=False,
                                          check_degenerate=False,
                                          overlap_criterion=self.overlap_criterion,
                                          overlap_bounds=self.bounds_box_filter,
                                          labels_format=self.labels_format)

        self.box_filter_resize = BoxFilter(check_overlap=False,
                                           check_min_area=True,
                                           check_degenerate=True,
                                           min_area=16,
                                           labels_format=self.labels_format)

        # Determines whether the result of the transformations is a valid training image.
        self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
                                              bounds=self.bounds_validator,
                                              n_boxes_min=self.n_boxes_min,
                                              labels_format=self.labels_format)

        # Utility transformations
        self.convert_to_3_channels = ConvertTo3Channels()  # Make sure all images end up having 3 channels.
        self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
        self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
        self.convert_to_float32 = ConvertDataType(to='float32')
        self.convert_to_uint8 = ConvertDataType(to='uint8')
        self.resize = Resize(height=resize_height,
                             width=resize_width,
                             box_filter=self.box_filter_resize,
                             labels_format=self.labels_format)

        # Photometric transformations
        self.random_brightness = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1],
                                                  prob=random_brightness[2])
        self.random_contrast = RandomContrast(lower=random_contrast[0], upper=random_contrast[1],
                                              prob=random_contrast[2])
        self.random_saturation = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1],
                                                  prob=random_saturation[2])
        self.random_hue = RandomHue(max_delta=random_hue[0], prob=random_hue[1])

        # Geometric transformations
        self.random_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
        self.patch_coord_generator = PatchCoordinateGenerator(must_match='w_ar',
                                                              min_scale=min_scale,
                                                              max_scale=max_scale,
                                                              scale_uniformly=False,
                                                              min_aspect_ratio=min_aspect_ratio,
                                                              max_aspect_ratio=max_aspect_ratio)
        self.random_patch = RandomPatch(patch_coord_generator=self.patch_coord_generator,
                                        box_filter=self.box_filter_patch,
                                        image_validator=self.image_validator,
                                        n_trials_max=self.n_trials_max,
                                        clip_boxes=self.clip_boxes,
                                        prob=1.0,
                                        can_fail=False,
                                        labels_format=self.labels_format)

        # Define the processing chain
        self.transformations = [self.convert_to_3_channels,
                                self.convert_to_float32,
                                self.random_brightness,
                                self.random_contrast,
                                self.convert_to_uint8,
                                self.convert_RGB_to_HSV,
                                self.convert_to_float32,
                                self.random_saturation,
                                self.random_hue,
                                self.convert_to_uint8,
                                self.convert_HSV_to_RGB,
                                self.random_patch,
                                self.random_flip,
                                self.resize]
Ejemplo n.º 20
0
    def set_generator(self,
                      train_images_dir,
                      train_annotation_path,
                      batch_size,
                      val_images_dir=None,
                      val_annotation_path=None):
        train_dataset = DataGenerator(load_images_into_memory=True,
                                      hdf5_dataset_path=None)
        train_dataset.parse_json(images_dirs=[train_images_dir],
                                 annotations_filenames=[train_annotation_path],
                                 ground_truth_available=True,
                                 include_classes='all',
                                 ret=False,
                                 verbose=True)
        train_dataset_size = train_dataset.get_dataset_size()
        if self.model_name == 'ssd_7':
            # Define the image processing chain.
            ssd_data_augmentation = DataAugmentationConstantInputSize(
                random_brightness=(-48, 48, 0.5),
                random_contrast=(0.5, 1.8, 0.5),
                random_saturation=(0.5, 1.8, 0.5),
                random_hue=(18, 0.5),
                random_flip=0.5,
                random_translate=((0.03, 0.5), (0.03, 0.5), 0.5),
                random_scale=(0.5, 2.0, 0.5),
                n_trials_max=3,
                clip_boxes=True,
                overlap_criterion='area',
                bounds_box_filter=(0.3, 1.0),
                bounds_validator=(0.5, 1.0),
                n_boxes_min=1,
                background=(0, 0, 0))

            # Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

            # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
            predictor_sizes = [
                self.model.get_layer('classes4').output_shape[1:3],
                self.model.get_layer('classes5').output_shape[1:3],
                self.model.get_layer('classes6').output_shape[1:3],
                self.model.get_layer('classes7').output_shape[1:3]
            ]

        elif self.model_name == 'ssd_300':
            # For the training generator:
            ssd_data_augmentation = SSDDataAugmentation(
                img_height=self.image_size[0],
                img_width=self.image_size[1],
                background=self.mean_color)

            # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

            # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
            predictor_sizes = [
                self.model.get_layer(
                    'conv4_3_norm_mbox_conf').output_shape[1:3],
                self.model.get_layer('fc7_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
            ]
        elif self.model_name == 'ssd_512':
            # For the training generator:
            ssd_data_augmentation = SSDDataAugmentation(
                img_height=self.image_size[0],
                img_width=self.image_size[1],
                background=self.mean_color)
            # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

            # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
            predictor_sizes = [
                self.model.get_layer(
                    'conv4_3_norm_mbox_conf').output_shape[1:3],
                self.model.get_layer('fc7_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3],
                self.model.get_layer('conv10_2_mbox_conf').output_shape[1:3]
            ]

        ssd_input_encoder = SSDInputEncoder(
            img_height=self.image_size[0],
            img_width=self.image_size[1],
            n_classes=self.n_classes,
            predictor_sizes=predictor_sizes,
            scales=self.scales,
            aspect_ratios_per_layer=self.aspect_ratios_per_layer,
            two_boxes_for_ar1=self.two_boxes_for_ar1,
            steps=self.steps,
            offsets=self.offsets,
            clip_boxes=self.clip_boxes,
            variances=self.variances,
            matching_type='multi',
            pos_iou_threshold=0.5,
            neg_iou_limit=0.5,
            normalize_coords=self.normalize_coords)

        self.generator = train_dataset.generate(
            batch_size=batch_size,
            shuffle=True,
            transformations=[ssd_data_augmentation],
            label_encoder=ssd_input_encoder,
            returns={'processed_images', 'encoded_labels'},
            keep_images_without_gt=False)
        self.steps_per_epoch = ceil(train_dataset_size / batch_size)

        if val_images_dir is not None and val_annotation_path is not None:
            val_dataset = DataGenerator(load_images_into_memory=True,
                                        hdf5_dataset_path=None)
            val_dataset.parse_json(images_dirs=[val_images_dir],
                                   annotations_filenames=[val_annotation_path],
                                   ground_truth_available=True,
                                   include_classes='all',
                                   ret=False,
                                   verbose=True)
            val_dataset_size = val_dataset.get_dataset_size()

            if self.model_name == 'ssd_300' or self.model_name == 'ssd_512':
                # For the validation generator:
                convert_to_3_channels = ConvertTo3Channels()
                resize = Resize(height=self.image_size[0],
                                width=self.image_size[1])
                transformations = [convert_to_3_channels, resize]
            else:
                transformations = []

            self.validation_data = val_dataset.generate(
                batch_size=batch_size,
                shuffle=False,
                transformations=transformations,
                label_encoder=ssd_input_encoder,
                returns={'processed_images', 'encoded_labels'},
                keep_images_without_gt=False)
            self.validation_steps = ceil(val_dataset_size / batch_size)

        else:
            self.validation_data = None
            self.validation_steps = None
Ejemplo n.º 21
0
def test_config(config):
    '''
    Test the given configuration ; the configuration should already have been
    used for training purposes, or this will return an error (see ssd_train.py)

    Arguments:
        config : the configuration of the model to use ; should already be
            loaded.

    '''
    local_dir = config.ROOT_FOLDER
    data_dir = config.DATA_DIR
    img_shape = config.IMG_SHAPE
    img_height = img_shape[0]  # Height of the model input images
    img_width = img_shape[1]  # Width of the model input images
    img_channels = img_shape[
        2]  # Number of color channels of the model input images
    n_classes = 20  # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
    normalize_coords = True

    K.clear_session()  # Clear previous models from memory.
    print("[INFO] loading model...")
    model_path = os.path.join(local_dir, 'models', config.MODEL_NAME)

    # We need to create an SSDLoss object in order to pass that to the model loader.
    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)
    model = load_model(model_path,
                       custom_objects={
                           'AnchorBoxes': AnchorBoxes,
                           'L2Normalization': L2Normalization,
                           'DecodeDetections': DecodeDetections,
                           'compute_loss': ssd_loss.compute_loss
                       })
    classes = config.CLASSES
    dataset = DataGenerator(load_images_into_memory=False,
                            hdf5_dataset_path=None)
    dataset_images_dir = os.path.join(data_dir, 'Images')
    dataset_annotations_dir = os.path.join(data_dir, 'Annotations/')
    dataset_test_image_set_filename = os.path.join(data_dir,
                                                   'ImageSets\\test.txt')

    dataset.parse_xml(images_dirs=[dataset_images_dir],
                      image_set_filenames=[dataset_test_image_set_filename],
                      annotations_dirs=[dataset_annotations_dir],
                      classes=classes,
                      include_classes='all',
                      exclude_truncated=False,
                      exclude_difficult=True,
                      ret=False)
    dataset.create_hdf5_dataset(file_path=config.MODEL_NAME,
                                resize=False,
                                variable_image_size=True,
                                verbose=True)

    convert_to_3_channels = ConvertTo3Channels()
    resize = Resize(height=img_height, width=img_width)
    dataset_size = dataset.get_dataset_size()

    print("Number of images in the dataset:\t{:>6}".format(dataset_size))

    predict_generator = dataset.generate(
        batch_size=config.PREDICT_BATCH_SIZE,
        shuffle=True,
        transformations=[convert_to_3_channels, resize],
        label_encoder=None,
        returns={
            'processed_images', 'filenames', 'inverse_transform',
            'original_images', 'original_labels'
        },
        keep_images_without_gt=False)

    count = 0
    while True and count < dataset_size:
        batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
            predict_generator)
        i = 0
        print("Image:", batch_filenames[i])
        print()
        print("Ground truth boxes:\n")
        print(np.array(batch_original_labels[i]))

        y_pred = model.predict(batch_images)
        y_pred_decoded = decode_detections(y_pred,
                                           confidence_thresh=0.5,
                                           iou_threshold=0.4,
                                           top_k=200,
                                           normalize_coords=normalize_coords,
                                           img_height=img_height,
                                           img_width=img_width)
        y_pred_decoded_inv = apply_inverse_transforms(
            y_pred_decoded, batch_inverse_transforms)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_decoded_inv[i])
        # cv2.imshow('original image',batch_original_images[i])
        # cv2.waitKey(800)
        # cv2.destroyAllWindows()
        colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist()
        plt.figure(figsize=(15, 8))
        plt.imshow(batch_original_images[i])

        current_axis = plt.gca()
        len_orig = 0
        for box in batch_original_labels[i]:
            len_orig += 1
            xmin = box[1]
            ymin = box[2]
            xmax = box[3]
            ymax = box[4]
            label = '{}'.format(classes[int(box[0])])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color='green',
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': 'green',
                                  'alpha': 1.0
                              })

        len_found = 0
        for box in y_pred_decoded_inv[i]:
            len_found += 1
            xmin = box[2]
            ymin = box[3]
            xmax = box[4]
            ymax = box[5]
            color = colors[int(box[0])]
            label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
            current_axis.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              color=color,
                              fill=False,
                              linewidth=2))
            current_axis.text(xmin,
                              ymin,
                              label,
                              size='x-large',
                              color='white',
                              bbox={
                                  'facecolor': color,
                                  'alpha': 1.0
                              })

        print('Number of original boxes : {}'.format(len_orig))
        print('Number of found boxes : {}'.format(len_found))
        plt.show()
        count += 1
Ejemplo n.º 22
0
#     ymin = int(round(box[-3] * orig_images[0].shape[0] / image_height))
#     xmax = int(round(box[-2] * orig_images[0].shape[1] / image_width))
#     ymax = int(round(box[-1] * orig_images[0].shape[0] / image_height))
#     class_id = int(box[0])
#     cv2.putText(image, classes[class_id], (xmin, ymin + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
#     cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), 1)
# cv2.namedWindow('image', cv2.WINDOW_NORMAL)
# cv2.imshow('image', image)
# cv2.waitKey(0)

images_dir = '/home/adam/.keras/datasets/VOCdevkit'
# Ground truth
val_hdf5_path = osp.join(images_dir, '07_test.h5')
val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=val_hdf5_path)
convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=image_height, width=image_width)
predict_generator = val_dataset.generate(batch_size=1,
                                         shuffle=True,
                                         transformations=[convert_to_3_channels, resize],
                                         label_encoder=None,
                                         returns={'processed_images',
                                                  'filenames',
                                                  'inverse_transform',
                                                  'original_images',
                                                  'original_labels'
                                                  },
                                         keep_images_without_gt=False)
# 2: Generate samples
while True:
    batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(
        predict_generator)
Ejemplo n.º 23
0
model_path = 'inference_low.h5'
fcos_loss = FCOSLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)
K.clear_session()  # Clear previous models from memory.
model = load_model(
    model_path,
    custom_objects={
        'DecodeDetectionsFast': DecodeDetectionsFast,
        'get_rpn_bbox': get_rpn_bbox,
        'keras_boardcoast': keras_boardcoast,
        'keras_exp_stride': keras_exp_stride,
        # 'compute_loss':fcos_loss.compute_loss
    })

convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=cfgs.IMG_SHORT_SIDE_LEN, width=cfgs.IMG_SHORT_SIDE_LEN)

if cfgs.CREATE_IMAGE_H5:
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)
else:
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=cfgs.TEST_HDF_DATASET)
val_dataset.parse_csv(
    images_dir=cfgs.IMAGE_DIR,
    labels_filename=cfgs.TEST_LABEL_FILENAME,
    input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
    include_classes='all')
if cfgs.CREATE_IMAGE_H5:
    val_dataset.create_hdf5_dataset(file_path=cfgs.TEST_HDF_DATASET,
                                    resize=(cfgs.IMG_SHORT_SIDE_LEN,
Ejemplo n.º 24
0
	def load_VOC_IMG_generators(self,model):
		print('Making VOC image generators')
		datadir = self.datas['DATA_PATH']
		train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
		val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
		test_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
		images_dir                   = os.path.join(datadir,'Images')
		annotations_dir              = os.path.join(datadir,'Annotations')
		train_image_set_filename  = os.path.join(datadir,'ImageSets','train.txt')
		val_image_set_filename      = os.path.join(datadir,'ImageSets','val.txt')
		test_image_set_filename      = os.path.join(datadir,'ImageSets','test.txt')
		generator_options = self.datas['GENERATOR']

		train_dataset.parse_xml(images_dirs=[images_dir],
		                        image_set_filenames=[train_image_set_filename],
		                        annotations_dirs=[annotations_dir],
		                        classes=self.datas['CLASSES'],
		                        include_classes='all',
		                        exclude_truncated=False,
		                        exclude_difficult=False,
		                        ret=False)
		val_dataset.parse_xml(images_dirs=[images_dir],
		                        image_set_filenames=[val_image_set_filename],
		                        annotations_dirs=[annotations_dir],
		                        classes=self.datas['CLASSES'],
		                        include_classes='all',
		                        exclude_truncated=False,
		                        exclude_difficult=False,
		                        ret=False)
		test_dataset.parse_xml(images_dirs=[images_dir],
		                        image_set_filenames=[test_image_set_filename],
		                        annotations_dirs=[annotations_dir],
		                        classes=self.datas['CLASSES'],
		                        include_classes='all',
		                        exclude_truncated=False,
		                        exclude_difficult=False,
		                        ret=False)

		convert_to_3_channels = ConvertTo3Channels()
		target_size = generator_options['TARGET_SIZE']
		resize = Resize(height=target_size[0], width=target_size[1])

		predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
	                       model.get_layer('fc7_mbox_conf').output_shape[1:3],
	                       model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
	                       model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
	                       model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
	                       model.get_layer('conv9_2_mbox_conf').output_shape[1:3]]
		scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
		scales = scales_pascal
		aspect_ratios = [[1.0, 2.0, 0.5],
                     [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                     [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                     [1.0, 2.0, 0.5],
                     [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters
		steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer.
		two_boxes_for_ar1 = True
		mean_color=[123,117,104] #TODO : add this as a parameter
		offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
		clip_boxes=False
		variances=[0.1, 0.1, 0.2, 0.2]
		normalize_coords=True

		ssd_input_encoder = SSDInputEncoder(img_height = target_size[0],
											img_width = target_size[1],
											n_classes = 20, #TODO : handle subsampling
											predictor_sizes=predictor_sizes,
											scales=scales,
											aspect_ratios_per_layer=aspect_ratios,
											two_boxes_for_ar1=two_boxes_for_ar1,
											steps=steps,
											offsets=offsets,
											clip_boxes=clip_boxes,
											variances=variances,
											matching_type='multi',
											pos_iou_threshold=0.5,
											neg_iou_limit=0.5,
											normalize_coords=normalize_coords
											)
		train_generator = train_dataset.generate(batch_size=generator_options['BATCH_SIZE'],
												shuffle=True,
												transformations=[convert_to_3_channels,
																resize],
												label_encoder=ssd_input_encoder,
												returns={'processed_images',
														 'encoded_labels'},
												keep_images_without_gt=False)

		val_generator = val_dataset.generate(batch_size=generator_options['BATCH_SIZE'],
												shuffle=True,
												transformations=[convert_to_3_channels,
																resize],
												label_encoder=ssd_input_encoder,
												returns={'processed_images',
														 'encoded_labels'},
												keep_images_without_gt=False)

		test_generator = test_dataset.generate(batch_size=generator_options['BATCH_SIZE'],
												shuffle=True,
												transformations=[convert_to_3_channels,
																resize],
												label_encoder=ssd_input_encoder,
												returns={'processed_images',
														 'encoded_labels'},
												keep_images_without_gt=False)
		return [train_generator,train_dataset.get_dataset_size()],[val_generator,val_dataset.get_dataset_size()],[test_generator,train_dataset.get_dataset_size()]