Ejemplo n.º 1
0
classes = [
    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
    'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
    'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

train_dataset = BatchGenerator(
    images_path='./Datasets/VOCdevkit/VOC2012/JPEGImages/',
    include_classes='all',
    box_output_format=['class_id', 'xmin', 'xmax', 'ymin', 'ymax'])

train_dataset.parse_xml(
    annotations_path='./Datasets/VOCdevkit/VOC2012/Annotations/',
    image_set_path='./Datasets/VOCdevkit/VOC2012/ImageSets/Main/',
    image_set='train.txt',
    classes=classes,
    exclude_truncated=False,
    exclude_difficult=False,
    ret=False)

train_generator = train_dataset.generate(
    batch_size=batch_size,
    train=True,
    ssd_box_encoder=ssd_box_encoder,
    equalize=False,
    brightness=(0.5, 2, 0.5),
    flip=0.5,
    translate=((0, 30), (0, 30), 0.5),
    scale=(0.75, 1.2, 0.5),
    random_crop=(
        300, 300, 1,
#                         annotations_paths=[VOC_2007_annotations_path,
#                                            VOC_2007_test_annotations_path,
#                                            VOC_2012_annotations_path],
#                         image_set_paths=[VOC_2007_trainval_image_set_path,
#                                          VOC_2007_test_image_set_path,
#                                          VOC_2012_train_image_set_path],
#                         classes=classes,
#                         include_classes='all',
#                         exclude_truncated=False,
#                         exclude_difficult=False,
#                         ret=False)
#
val_dataset.parse_xml(images_paths=[road_test_images_path],
                      annotations_paths=[road_test_annotations_path],
                      image_set_paths=[road_test_image_set_path],
                      classes=classes,
                      include_classes='all',
                      exclude_truncated=False,
                      exclude_difficult=False,
                      ret=False)

# 3: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
predictor_sizes = [
    model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3],
    model.get_layer('fc7_mbox_conf').output_shape[1:3],
    model.get_layer('conv6_2_mbox_conf').output_shape[1:3],
    model.get_layer('conv7_2_mbox_conf').output_shape[1:3],
    model.get_layer('conv8_2_mbox_conf').output_shape[1:3],
    model.get_layer('conv9_2_mbox_conf').output_shape[1:3]
]
Ejemplo n.º 3
0
VOC_2007_test_images_dir = '../../datasets/VOCdevkit/VOC2007_Test/JPEGImages/'
VOC_2007_test_annotations_dir = '../../datasets/VOCdevkit/VOC2007_Test/Annotations/'
VOC_2007_test_image_set_filename = '../../datasets/VOCdevkit/VOC2007_Test/ImageSets/Main/test.txt'

# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = [
    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
    'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
    'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

dataset.parse_xml(images_dirs=[VOC_2007_test_images_dir],
                  image_set_filenames=[VOC_2007_test_image_set_filename],
                  annotations_dirs=[VOC_2007_test_annotations_dir],
                  classes=classes,
                  include_classes='all',
                  exclude_truncated=False,
                  exclude_difficult=True,
                  ret=False)

generator = dataset.generate(batch_size=1,
                             shuffle=True,
                             train=False,
                             returns={
                                 'processed_images', 'filenames',
                                 'inverse_transform', 'original_images',
                                 'original_labels'
                             },
                             max_crop_and_resize=False,
                             random_pad_and_resize=False,
                             resize=(img_height, img_width))
Ejemplo n.º 4
0
PKLOT_images_dir = 'C:/Users/Damini/Documents/ECE477_ObjectDetection_Data_Storage/PKLot/PKLot'

# The directories that contain the annotations.
PKLOT_annotations_dir = 'C:/Users/Damini/Documents/ECE477_ObjectDetection_Data_Storage/PKLot2VOC_full_dataset/'

# Training dataset
PKLOT_train_image_set_filename = 'C:/Users/Damini/Documents/ECE477_ObjectDetection_Data_Storage/PKLot2VOC_full_dataset/ImageSets/Main/train.txt'
PKLOT_val_image_set_filename = 'C:/Users/Damini/Documents/ECE477_ObjectDetection_Data_Storage/PKLot2VOC_full_dataset/ImageSets/Main/valid.txt'

# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = ['occupied', 'vaccant']

train_dataset.parse_xml(images_dirs=[PKLOT_images_dir],
                        image_set_filenames=[PKLOT_train_image_set_filename],
                        annotations_dirs=[PKLOT_annotations_dir],
                        classes=classes,
                        include_classes='all',
                        exclude_truncated=False,
                        exclude_difficult=False,
                        ret=False)

val_dataset.parse_xml(images_dirs=[PKLOT_images_dir],
                      image_set_filenames=[PKLOT_val_image_set_filename],
                      annotations_dirs=[PKLOT_annotations_dir],
                      classes=classes,
                      include_classes='all',
                      exclude_truncated=False,
                      exclude_difficult=True,
                      ret=False)

# The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes.
predictor_sizes = [
Ejemplo n.º 5
0
#                         ret=False)

# train_dataset.parse_xml(images_paths=[VOC_2007_images_path,VOC_2007_test_images_path],
#                         annotations_paths=[VOC_2007_annotations_path,VOC_2007_test_annotations_path],
#                         image_set_paths=[VOC_2007_train_image_set_path,VOC_2007_test_image_set_path],
#                         classes=classes,
#                         include_classes='all',
#                         exclude_truncated=False,
#                         exclude_difficult=False,
#                         ret=False)

train_dataset.parse_xml(
    images_paths=[apple_images, apple_images],
    annotations_paths=[apple_annotations, apple_annotations],
    image_set_paths=[apple_train_imageset, apple_val_imageset],
    classes=classes,
    include_classes='all',
    exclude_truncated=False,
    exclude_difficult=False,
    ret=False)

train_generator = train_dataset.generate(
    batch_size=batch_size,
    train=True,
    ssd_box_encoder=ssd_box_encoder,
    equalize=False,
    brightness=(0.5, 2, 0.5),
    flip=0.5,
    translate=False,
    scale=False,
    max_crop_and_resize=(