def generate(args): """Data generation and augmentation # Arguments args: Dictionary, command line arguments # Returns train_generator: train set generator validation_generator: validation set generator num_training: Integer, number of images in the train split. num_validation: Integer, number of images in the validation split. """ # Using the data Augmentation in traning data normalizer = Normalizer() train_aug = tf.keras.preprocessing.image.ImageDataGenerator( #rescale=1. / 255., shear_range=args.shear_range, zoom_range=args.zoom_range, rotation_range=args.rotation_range, width_shift_range=args.width_shift_range, height_shift_range=args.height_shift_range, horizontal_flip=args.horizontal_flip, vertical_flip=args.vertical_flip, preprocessing_function=normalizer) validation_aug = tf.keras.preprocessing.image.ImageDataGenerator( preprocessing_function=normalizer) train_generator = train_aug.flow_from_directory( args.train_dir, target_size=(args.input_size, args.input_size), batch_size=args.batch_size, class_mode='categorical', shuffle=True) mean, std = [], [] if args.mean is None or args.std is None: mean, std = normalizer.get_stats(args.train_dir, train_generator.filenames, (args.input_size, args.input_size)) else: mean = [float(m.strip()) for m in args.mean.split(',')] std = [float(s.strip()) for s in args.std.split(',')] normalizer.set_stats(mean, std) if not os.path.exists('model'): os.makedirs('model') with open('model/stats.txt', 'w') as stats: stats.write("Dataset mean [r, g, b] = {}\n".format(mean)) label_map = (train_generator.class_indices) label_map = dict((v, k) for k, v in label_map.items()) with open('model/labels.csv', 'w') as csv_file: csv_writer = csv.writer(csv_file, lineterminator='\n') csv_writer.writerows(label_map.items()) validation_generator = validation_aug.flow_from_directory( args.validation_dir, target_size=(args.input_size, args.input_size), batch_size=args.batch_size, class_mode='categorical') return train_generator, validation_generator, train_generator.samples, validation_generator.samples, len( label_map)