# TODO: Set the paths to the datasets here. # The directories that contain the images. train_image_set_filename = [ "/home/uidn4455/Desktop/Devdatta_shared/dataset/COCO-text-2015/Synt-localization_dataset/Train/image", ] gt_path_json = "/home/uidn4455/Desktop/Devdatta_shared/dataset/COCO-text-2015/Synt-localization_dataset/Train/gt_json" val_image_set_filename = "/home/uidn4455/Desktop/Devdatta_shared/dataset/COCO-text-2015/Synt-localization_dataset/Vala/image" val_gt_path_json = "/home/uidn4455/Desktop/Devdatta_shared/dataset/COCO-text-2015/Synt-localization_dataset/Vala/gt_json" train_dataset.parse_json( images_dirs=train_image_set_filename, classes_file= '/home/uidn4455/Desktop/Devdatta_shared/OCR/text_box_++_my_imp/class_name.json', annotations_filenames=gt_path_json, verbose=True, ground_truth_available=True) val_dataset.parse_json( images_dirs=val_image_set_filename, classes_file= '/home/uidn4455/Desktop/Devdatta_shared/OCR/text_box_++_my_imp/class_name.json', annotations_filenames=val_gt_path_json, # verbose=True, ground_truth_available=True) # 3: Set the batch size. batch_size = 16 # Change the batch size if you like, or if you run into GPU memory issues.
from models.keras_ssd7 import build_model from keras_loss_function.keras_ssd_loss import SSDLoss from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes from keras_layers.keras_layer_DecodeDetections import DecodeDetections from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast import matplotlib.patches as patches train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) train_dataset.parse_json( images_dirs=images_dir, annotations_filenames=[ "/home/jsearcy/Desktop/new_malaria/train/output.json" ], ground_truth_available=True, include_classes='all') develop_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) develop_dataset.parse_json( images_dirs=images_dir, annotations_filenames=[ "/home/jsearcy/Desktop/new_malaria/develop/output.json" ], ground_truth_available=True, include_classes='all')
def set_generator(self, train_images_dir, train_annotation_path, batch_size, val_images_dir=None, val_annotation_path=None): train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) train_dataset.parse_json(images_dirs=[train_images_dir], annotations_filenames=[train_annotation_path], ground_truth_available=True, include_classes='all', ret=False, verbose=True) train_dataset_size = train_dataset.get_dataset_size() if self.model_name == 'ssd_7': # Define the image processing chain. ssd_data_augmentation = DataAugmentationConstantInputSize( random_brightness=(-48, 48, 0.5), random_contrast=(0.5, 1.8, 0.5), random_saturation=(0.5, 1.8, 0.5), random_hue=(18, 0.5), random_flip=0.5, random_translate=((0.03, 0.5), (0.03, 0.5), 0.5), random_scale=(0.5, 2.0, 0.5), n_trials_max=3, clip_boxes=True, overlap_criterion='area', bounds_box_filter=(0.3, 1.0), bounds_validator=(0.5, 1.0), n_boxes_min=1, background=(0, 0, 0)) # Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ self.model.get_layer('classes4').output_shape[1:3], self.model.get_layer('classes5').output_shape[1:3], self.model.get_layer('classes6').output_shape[1:3], self.model.get_layer('classes7').output_shape[1:3] ] elif self.model_name == 'ssd_300': # For the training generator: ssd_data_augmentation = SSDDataAugmentation( img_height=self.image_size[0], img_width=self.image_size[1], background=self.mean_color) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ self.model.get_layer( 'conv4_3_norm_mbox_conf').output_shape[1:3], self.model.get_layer('fc7_mbox_conf').output_shape[1:3], self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] elif self.model_name == 'ssd_512': # For the training generator: ssd_data_augmentation = SSDDataAugmentation( img_height=self.image_size[0], img_width=self.image_size[1], background=self.mean_color) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ self.model.get_layer( 'conv4_3_norm_mbox_conf').output_shape[1:3], self.model.get_layer('fc7_mbox_conf').output_shape[1:3], self.model.get_layer('conv6_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv7_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv8_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv9_2_mbox_conf').output_shape[1:3], self.model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=self.image_size[0], img_width=self.image_size[1], n_classes=self.n_classes, predictor_sizes=predictor_sizes, scales=self.scales, aspect_ratios_per_layer=self.aspect_ratios_per_layer, two_boxes_for_ar1=self.two_boxes_for_ar1, steps=self.steps, offsets=self.offsets, clip_boxes=self.clip_boxes, variances=self.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=self.normalize_coords) self.generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) self.steps_per_epoch = ceil(train_dataset_size / batch_size) if val_images_dir is not None and val_annotation_path is not None: val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) val_dataset.parse_json(images_dirs=[val_images_dir], annotations_filenames=[val_annotation_path], ground_truth_available=True, include_classes='all', ret=False, verbose=True) val_dataset_size = val_dataset.get_dataset_size() if self.model_name == 'ssd_300' or self.model_name == 'ssd_512': # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=self.image_size[0], width=self.image_size[1]) transformations = [convert_to_3_channels, resize] else: transformations = [] self.validation_data = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=transformations, label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) self.validation_steps = ceil(val_dataset_size / batch_size) else: self.validation_data = None self.validation_steps = None
write_new_json( train, '/home/jsearcy/UO Data Science Dropbox/Public Datasets/BBBC041o/train.json', folder='train') write_new_json( test, '/home/jsearcy/UO Data Science Dropbox/Public Datasets/BBBC041o/test.json', folder='test') oa.aosefoeji train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) train_dataset.parse_json(images_dirs=images_dir, annotations_filenames=["output_train.json"], ground_truth_available=True, include_classes=use_list) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset.parse_json(images_dirs=images_dir, annotations_filenames=["output_test.json"], ground_truth_available=True, include_classes=use_list) val_dataset.create_hdf5_dataset(file_path='val_dataset.h5', resize=(1200, 1600), variable_image_size=True, verbose=True)