def convert_to_h5(images_dir, annotation_csv, h5_file_name):
    dataset = DataGenerator(load_images_into_memory=False)

    dataset.parse_csv(images_dir, annotation_csv, input_format, verbose=True)

    dataset.create_hdf5_dataset(file_path=h5_file_name + ".h5", verbose=True)
    return
Example #2
0
    def load_dataset_from_data(self):
        """
        从原始图片和CSV, 加载数据集, 同时写入HDF5文件
        """
        train_dataset = DataGenerator(load_images_into_memory=False,
                                      hdf5_dataset_path=None)
        val_dataset = DataGenerator(load_images_into_memory=False,
                                    hdf5_dataset_path=None)

        images_dir = self.dataset_dir  # 图片文件夹

        # Ground truth, 真值, 图片, 物体框(4个点), 物体类别
        train_labels_filename = os.path.join(self.dataset_dir,
                                             'labels_train.csv')
        val_labels_filename = os.path.join(self.dataset_dir, 'labels_val.csv')

        train_dataset.parse_csv(images_dir=images_dir,
                                labels_filename=train_labels_filename,
                                input_format=[
                                    'image_name', 'xmin', 'xmax', 'ymin',
                                    'ymax', 'class_id'
                                ],
                                include_classes='all')  # 解析csv文件

        val_dataset.parse_csv(images_dir=images_dir,
                              labels_filename=val_labels_filename,
                              input_format=[
                                  'image_name', 'xmin', 'xmax', 'ymin', 'ymax',
                                  'class_id'
                              ],
                              include_classes='all')  # 解析csv文件

        # HDF5 文件
        train_dataset_hdf5 = os.path.join(self.data_dir,
                                          'dataset_udacity_traffic_train.h5')
        val_dataset_hdf5 = os.path.join(self.data_dir,
                                        'dataset_udacity_traffic_val.h5')

        train_dataset.create_hdf5_dataset(file_path=train_dataset_hdf5,
                                          resize=False,
                                          variable_image_size=True,
                                          verbose=True)

        val_dataset.create_hdf5_dataset(file_path=val_dataset_hdf5,
                                        resize=False,
                                        variable_image_size=True,
                                        verbose=True)

        # Get the number of samples in the training and validations datasets.
        train_dataset_size = train_dataset.get_dataset_size()
        val_dataset_size = val_dataset.get_dataset_size()

        print("Number of images in the training dataset:\t{:>6}".format(
            train_dataset_size))
        print("Number of images in the validation dataset:\t{:>6}".format(
            val_dataset_size))

        return train_dataset, train_dataset_size, val_dataset, val_dataset_size
Example #3
0
 def prepare_ds(self, dataset_folder, annotation_file=''):
     # prepares dataset from folder and annotations file
     ds = DataGenerator()
     ds.parse_csv(images_dir=dataset_folder,
                  labels_filename=annotation_file,
                  input_format=self.config.input_format,
                  include_classes='all',
                  random_sample=False)
     return ds
Example #4
0
ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

train_dir = './data/train2014'
val_dir = './data/train2014'

train_annotations_dir = './data/train_anno.csv'
val_annotations_dir = './data/val_anno.csv'

train_dataset.parse_csv(images_dir=train_dir,
                        labels_filename=train_annotations_dir,
                        input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
                        include_classes='all',
                        ret=False)

val_dataset.parse_csv(images_dir=val_dir,
                      labels_filename=val_annotations_dir,
                      input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
                      include_classes='all',
                      ret=False)

# 3: Set the batch size.

batch_size = 32 # Change the batch size if you like, or if you run into GPU memory issues.

# 4: Set the image transformations for pre-processing and data augmentation options.
images_folder = args.images_folder
labels = args.labels
model_name = args.model_name
nb_test_images = args.number

normalize_coords = True
img_height = 300  # Height of the model input images
img_width = 300  # Width of the model input images
n_classes = 1

# Set the generator for the predictions.
val_dataset = DataGenerator()

val_dataset.parse_csv(images_dir=images_folder,
                      labels_filename=labels,
                      include_classes='all')

predict_generator = val_dataset.generate(
    batch_size=1,
    shuffle=False,
    transformations=[],
    label_encoder=None,
    returns={'processed_images', 'original_labels'},
    keep_images_without_gt=False)

val_dataset_size = val_dataset.get_dataset_size()

# number of test set images to use during evaluation

# number of bound not cut
Example #6
0
# drbox_loss = DRBoxLoss(neg_pos_ratio=3, alpha=1.0)
# model_name = 'trained_models/1model.h5'
# model = load_model(model_name, custom_objects={'L2Normalization': L2Normalization, 'AnchorBoxes': AnchorBoxes,
#                                                'compute_loss': drbox_loss.compute_loss})

# 3. Set up the data generators for the training

# Instantiate two `DataGenerator` objects: One for training, one for validation.

train_dataset = DataGenerator(load_images_into_memory=False, show_images=False)
val_dataset = DataGenerator(load_images_into_memory=False)

# Parse the image and label lists for the training and validation datasets.

train_dataset.parse_csv(images_dir=images_dir,
                        labels_filename=train_labels_filename,
                        include_classes='all')

val_dataset.parse_csv(images_dir=images_dir,
                      labels_filename=val_labels_filename,
                      include_classes='all')

# Set the batch size.

batch_size = 64

# Set the image transformations for pre-processing and data augmentation options.

# For the training generator:
data_augmentation = DataAugmentation(img_height, img_width, proba_no_aug=proba_no_aug)
Example #7
0
# The directories that contain the images.
trainimages_dir      = '../CommonFiles/SingleType111Loops/trainimages/'
testimages_dir       = '../CommonFiles/SingleType111Loops/testimages/'

# The files that contain the annotations.
trainlabels  = '../CommonFiles/SingleType111Loops/trainlabels.csv'
testlabels   = '../CommonFiles/SingleType111Loops/testlabels.csv'

input_format = ['image_name', 'class_id', 'xmin', 'ymin', 'xmax', 'ymax']

classes = ['111']

train_dataset.parse_csv(images_dir=trainimages_dir,
                  labels_filename=trainlabels,
                  input_format=input_format,
                  include_classes='all',
                  random_sample=False,
                  ret=False,
                  verbose=True) 

val_dataset.parse_csv(images_dir=testimages_dir,
                  labels_filename=testlabels,
                  input_format=input_format,
                  include_classes='all',
                  random_sample=False,
                  ret=False,
                  verbose=True)

# Optional: Convert the dataset into an HDF5 dataset. This will require more disk space, but will
# speed up the training. Doing this is not relevant in case you activated the `load_images_into_memory`
# option in the constructor, because in that cas the images are in memory already anyway. If you don't
Example #8
0
                            hdf5_dataset_path=None)

# 2: 加载数据

# TODO: 设置数据集地址.

# Images
images_dir = './driving_datasets/'

# Ground truth
train_labels_filename = './driving_datasets/labels_train.csv'
val_labels_filename = './driving_datasets/labels_val.csv'

train_dataset.parse_csv(
    images_dir=images_dir,
    labels_filename=train_labels_filename,
    input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax',
                  'class_id'],  # CSV 文件前 6 列的值
    include_classes='all')

val_dataset.parse_csv(
    images_dir=images_dir,
    labels_filename=val_labels_filename,
    input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
    include_classes='all')

# 得到训练和validation数据集的数据的量.
train_dataset_size = train_dataset.get_dataset_size()
val_dataset_size = val_dataset.get_dataset_size()

print("训练集的图像数量:\t{:>6}".format(train_dataset_size))
print("validation集的图像数量\t{:>6}".format(val_dataset_size))
Example #9
0
from bounding_box_utils.bounding_box_utils import iou

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_patch_sampling_ops import *
from data_generator.object_detection_2d_geometric_ops import *
from data_generator.object_detection_2d_photometric_ops import *
from data_generator.object_detection_2d_image_boxes_validation_utils import *
from data_generator.data_augmentation_chain_original_ssd import *

dataset = DataGenerator(labels_output_format=('class_id', 'xmin', 'ymin',
                                              'xmax', 'ymax'))

images_dir = 'E:/Hamed/Projects/Python/License Plate Detection/License-Plate-Detection/Final Plates/background.csv'
annotations_dir = 'E:/Hamed/Projects/Python/License Plate Detection/License-Plate-Detection/Final Plates/summary.csv'
image_set_filename = 'E:/Hamed/Projects/Python/License Plate Detection/License-Plate-Detection/Final Plates/imageset.txt'

characterList = np.array(
    'A B C D E F G H I J K L M N O P Q R S T U V W X Y Z'.split(' '))
numbersList = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int)
func = np.vectorize(str)
classes = np.concatenate(
    [np.array(['background']),
     func(numbersList), characterList])

dataset.parse_csv(
    images_dir=images_dir,
    labels_filename=image_set_filename,
    input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
    include_classes=classes[0],
    ret=False)
# The directories that contain the annotations.
labels_file_train = '../datasets/local_groundtruth_train.csv'
labels_file_test = '../datasets/local_groundtruth_test.csv'

# The XML parser needs to now what object class names to look for and in which order to map them to integers.
classes = [
    'Adidas', 'Adidas-text', 'Airness', 'BFGoodrich', 'Base', 'Bik',
    'Bouigues', 'Bridgestone', 'Bridgestone-text', 'Carglass', 'Citroen',
    'Citroen-text', 'CocaCola', 'Cofidis', 'Dexia', 'ELeclerc', 'Ferrari',
    'Gucci', 'Kia', 'Mercedes', 'Nike', 'Peugeot', 'Puma', 'Puma-text',
    'Quick', 'Reebok', 'Roche', 'SNCF', 'Shell', 'Standard_Liege',
    'StellaArtois', 'TNT', 'Total', 'US_President', 'Umbro', 'VRT', 'Veolia'
]  # Just so we can print class names onto the image instead of IDs

train_dataset.parse_csv(
    images_dir, labels_file_train,
    ['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'])
val_dataset.parse_csv(
    images_dir, labels_file_test,
    ['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'])

# Optional: Convert the dataset into an HDF5 dataset. This will require more disk space, but will
# speed up the training. Doing this is not relevant in case you activated the `load_images_into_memory`
# option in the constructor, because in that cas the images are in memory already anyway. If you don't
# want to create HDF5 datasets, comment out the subsequent two function calls.

train_dataset.create_hdf5_dataset(
    file_path='../datasets/belgas_train_dataset.h5',
    resize=False,
    variable_image_size=True,
    verbose=True)
Example #11
0
# 2: Optional: Load some weights

model.load_weights(model_path, by_name=True)

train_dataset = DataGenerator(load_images_into_memory=False,
                              hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=False,
                            hdf5_dataset_path=None)

# 2: Parse the image and label lists for the training and validation datasets.

# TODO: Set the paths to your dataset here.

# Images
images_dir = './infiles'
'''
# Ground truth
train_labels_filename = 'onsite-images-export.csv'
val_labels_filename   = 'onsite-images-valid.csv'

train_dataset.parse_csv(images_dir=images_dir,
                        labels_filename=train_labels_filename,
                        input_format=['image_name', 'xmin', 'ymin', 'xmax', 'ymax', 'class_id'], # This is the order of the first six columns in the CSV file that contains the labels for your dataset. If your labels are in XML format, maybe the XML parser will be helpful, check the documentation.
                        include_classes='all')

val_dataset.parse_csv(images_dir=images_dir,
                      labels_filename=val_labels_filename,
                      input_format=['image_name', 'xmin', 'ymin', 'xmax', 'ymax', 'class_id'],
                      include_classes='all')

Example #12
0
if NEW_DATA:
    # Images
    images_dir_train = "../../datasets/tires-data/train/"
    images_dir_valid = "../../datasets/tires-data/valid/"
    # Ground truth
    train_labels_filename = "../../datasets/tires-data/labels_train.csv"
    val_labels_filename = "../../datasets/tires-data/labels_valid.csv"

    train_dataset.parse_csv(
        images_dir=images_dir_train,
        labels_filename=train_labels_filename,
        input_format=[
            "image_name",
            "xmin",
            "xmax",
            "ymin",
            "ymax",
            "class_id",
        ],  # This is the order of the first six columns in the CSV file that contains the labels for your dataset. If your labels are in XML format, maybe the XML parser will be helpful, check the documentation.
        include_classes="all",
    )
    val_dataset.parse_csv(
        images_dir=images_dir_valid,
        labels_filename=val_labels_filename,
        input_format=[
            "image_name", "xmin", "xmax", "ymin", "ymax", "class_id"
        ],
        include_classes="all",
    )
Example #13
0
    # 3: Compile the model so that Keras won't complain the next time you load it.
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
    model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

    ###############################################################################
    # 3: Build the DataGenerator
    ###############################################################################
    test_dataset = DataGenerator(load_images_into_memory=False,
                                 hdf5_dataset_path=None)
    test_dataset.parse_csv(images_dir=args.im_path,
                           labels_filename=args.test_label_path,
                           input_format=[
                               'image_name', 'xmin', 'ymin', 'xmax', 'ymax',
                               'class_id'
                           ],
                           include_classes='all',
                           random_sample=False,
                           ret=False,
                           verbose=True)

    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)
    val_dataset.parse_csv(images_dir=args.im_path,
                          labels_filename=args.val_label_path,
                          input_format=[
                              'image_name', 'xmin', 'ymin', 'xmax', 'ymax',
                              'class_id'
                          ],
                          include_classes='all',
                          random_sample=False,
Example #14
0
def main(args=args):
    """
    main function that parses the arguments and trains
    :param args: arguments related
    :return: None
    """
    # pylint: disable=line-too-long

    # Images
    images_dir = os.path.abspath(os.path.join(
        os.getcwd(), '')) + "/data/" + args.dataset + "/images/"

    # # Ground truth
    train_labels_filename = os.path.abspath(os.path.join(
        os.getcwd(), '')) + "/data/" + args.dataset + "/train.csv"
    val_labels_filename = os.path.abspath(os.path.join(
        os.getcwd(), '')) + "/data/" + args.dataset + "/val.csv"
    test_labels_filename = os.path.abspath(os.path.join(
        os.getcwd(), '')) + "/data/" + args.dataset + "/test.csv"

    train_dataset = DataGenerator(load_images_into_memory=False,
                                  hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)
    test_dataset = DataGenerator(load_images_into_memory=False,
                                 hdf5_dataset_path=None)

    #
    train_dataset.parse_csv(
        images_dir=images_dir,
        labels_filename=train_labels_filename,
        input_format=[
            'image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'
        ],
        # This is the order of the first six columns in the CSV file that contains the labels for your dataset. If your labels are in XML format, maybe the XML parser will be helpful, check the documentation.
        include_classes='all')

    val_dataset.parse_csv(images_dir=images_dir,
                          labels_filename=val_labels_filename,
                          input_format=[
                              'image_name', 'xmin', 'xmax', 'ymin', 'ymax',
                              'class_id'
                          ],
                          include_classes='all')

    test_dataset.parse_csv(images_dir=images_dir,
                           labels_filename=test_labels_filename,
                           input_format=[
                               'image_name', 'xmin', 'xmax', 'ymin', 'ymax',
                               'class_id'
                           ],
                           include_classes='all')

    # Optional: Convert the dataset into an HDF5 dataset. This will require more disk space, but will
    # speed up the training. Doing this is not relevant in case you activated the `load_images_into_memory`
    # option in the constructor, because in that cas the images are in memory already anyway. If you don't
    # want to create HDF5 datasets, comment out the subsequent two function calls.

    train_dataset.create_hdf5_dataset(
        file_path=os.path.abspath(os.path.join(os.getcwd(), '')) + "/data/" +
        args.dataset + "/polyp_train.h5",
        resize=False,
        variable_image_size=True,
        verbose=True,
        images_dir=images_dir)

    val_dataset.create_hdf5_dataset(
        file_path=os.path.abspath(os.path.join(os.getcwd(), '')) + "/data/" +
        args.dataset + "/polyp_val.h5",
        resize=False,
        variable_image_size=True,
        verbose=True,
        images_dir=images_dir)

    test_dataset.create_hdf5_dataset(
        file_path=os.path.abspath(os.path.join(os.getcwd(), '')) + "/data/" +
        args.dataset + "/polyp_test.h5",
        resize=False,
        variable_image_size=True,
        verbose=True,
        images_dir=images_dir)
# 2: Parse the image and label lists for the training and validation datasets.

# TODO: Set the paths to your dataset here.

# Images
images_dir = '../../datasets/udacity_driving_datasets/'

# Ground truth
train_labels_filename = '../../datasets/udacity_driving_datasets/labels_train.csv'
val_labels_filename = '../../datasets/udacity_driving_datasets/labels_val.csv'

train_dataset.parse_csv(
    images_dir=images_dir,
    labels_filename=train_labels_filename,
    input_format=[
        'image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'
    ],  # This is the order of the first six columns in the CSV file that contains the labels for your dataset. If your labels are in XML format, maybe the XML parser will be helpful, check the documentation.
    include_classes='all')

val_dataset.parse_csv(
    images_dir=images_dir,
    labels_filename=val_labels_filename,
    input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
    include_classes='all')

# Optional: Convert the dataset into an HDF5 dataset. This will require more disk space, but will
# speed up the training. Doing this is not relevant in case you activated the `load_images_into_memory`
# option in the constructor, because in that cas the images are in memory already anyway. If you don't
# want to create HDF5 datasets, comment out the subsequent two function calls.
Example #16
0
model_path = 'ssd7_v3_epoch-38_loss-0.9415_val_loss-0.6735.h5'
# 创建 SSDLoss 对象
ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)
K.clear_session()  # 从内存中清理曾经加载的模型.
model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
                                               'compute_loss': ssd_loss.compute_loss})

eval_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

# Images
images_dir = './data/'
out_dir = './predict_error'
# Ground truth
eval_labels_filename = './data/labels_eval.csv'
eval_dataset.parse_csv(images_dir=images_dir,
                       labels_filename=eval_labels_filename,
                       input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
                       include_classes='all')
# 得到训练和validation数据集的数据的量.
eval_dataset_size = eval_dataset.get_dataset_size()
print("evaluate集的图像数量\t{:>6}".format(eval_dataset_size))
predict_generator = eval_dataset.generate(batch_size=eval_dataset_size,
                                          shuffle=True,
                                          transformations=[],
                                          label_encoder=None,
                                          returns={'processed_images',
                                                   'processed_labels',
                                                   'filenames'},
                                          keep_images_without_gt=False)
batch_images, batch_labels, batch_filenames = next(predict_generator)
# 3: 作预测
y_pred = model.predict(batch_images)
Example #17
0
        'keras_exp_stride': keras_exp_stride,
        # 'compute_loss':fcos_loss.compute_loss
    })

convert_to_3_channels = ConvertTo3Channels()
resize = Resize(height=cfgs.IMG_SHORT_SIDE_LEN, width=cfgs.IMG_SHORT_SIDE_LEN)

if cfgs.CREATE_IMAGE_H5:
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=None)
else:
    val_dataset = DataGenerator(load_images_into_memory=False,
                                hdf5_dataset_path=cfgs.TEST_HDF_DATASET)
val_dataset.parse_csv(
    images_dir=cfgs.IMAGE_DIR,
    labels_filename=cfgs.TEST_LABEL_FILENAME,
    input_format=['image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'],
    include_classes='all')
if cfgs.CREATE_IMAGE_H5:
    val_dataset.create_hdf5_dataset(file_path=cfgs.TEST_HDF_DATASET,
                                    resize=(cfgs.IMG_SHORT_SIDE_LEN,
                                            cfgs.IMG_SHORT_SIDE_LEN),
                                    variable_image_size=True,
                                    verbose=True)

val_dataset_size = val_dataset.get_dataset_size()
predict_generator = val_dataset.generate(
    batch_size=1,
    shuffle=False,
    transformations=[convert_to_3_channels, resize],
    label_encoder=None,
def data_generator_func(config: Dict):
    """Data Generator for training data and validation data
    
    Parameters
    ----------
    config : Dict
        Config yaml/json containing all parameter
    
    Returns
    -------
        train_dataset, val_dataset
    """
    # Init DataGenerator
    start_data = timer()
    train_dataset = DataGenerator(load_images_into_memory=config['training']
                                  ['train_load_images_into_memory'],
                                  hdf5_dataset_path=None)
    val_dataset = DataGenerator(load_images_into_memory=config['training']
                                ['validation_load_images_into_memory'],
                                hdf5_dataset_path=None)
    if config['training']['train_load_images_into_memory'] is not False:
        print("[INFO]... You have chosen to load data into memory")
    else:
        print(
            "[WARNING]... You have chosen not to load data into memory. It will still work but will be much slower"
        )

    train_img_dir = config['training']['train_img_dir']
    val_img_dir = config['training']['val_img_dir']

    train_annotation_dir = config['training']['train_annotation_dir']
    val_annotation_dir = config['training']['val_annotation_dir']

    train_image_set_filename = config['training']['train_image_set_filename']
    val_image_set_filename = config['training']['val_image_set_filename']

    classes = config['training']['classes']

    if config['training']['annotation_type'] == 'xml':
        train_dataset.parse_xml(images_dirs=[train_img_dir],
                                image_set_filenames=[train_image_set_filename],
                                annotations_dirs=[train_annotation_dir],
                                classes=classes,
                                include_classes='all',
                                exclude_truncated=False,
                                exclude_difficult=False,
                                ret=False)

        val_dataset.parse_xml(images_dirs=[val_img_dir],
                              image_set_filenames=[val_image_set_filename],
                              annotations_dirs=[val_annotation_dir],
                              classes=classes,
                              include_classes='all',
                              exclude_truncated=False,
                              exclude_difficult=True,
                              ret=False)

    if config['training']['annotation_type'] == 'csv':
        train_dataset.parse_csv(images_dir=train_img_dir,
                                labels_filename=train_annotation_dir,
                                input_format=[
                                    'image_name', 'xmin', 'xmax', 'ymin',
                                    'ymax', 'class_id'
                                ],
                                include_classes='all')

        val_dataset.parse_csv(images_dir=val_img_dir,
                              labels_filename=val_annotation_dir,
                              input_format=[
                                  'image_name', 'xmin', 'xmax', 'ymin', 'ymax',
                                  'class_id'
                              ],
                              include_classes='all')
    end_data = timer()
    print(
        f"[INFO]...Time taken by Data loading/transformation Job is {(end_data - start_data)/60:.2f} min(s)"
    )
    return train_dataset, val_dataset