def build_hdf5(dataset, dataset_mean_path, label_encoder_path): # list of R, G, B means R, G, B = [], [], [] # initialize image preprocessor aap = AspectAwarePreprocessor(256, 256) # loop over DATASETS for d_type, paths, labels, output_path in dataset: # construct HDF% dataset writer writer = HDF5DatasetWriter((len(labels), 256, 256, 3), output_path) # construct progress bar widgets = [ f'Building {d_type}: ', progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(labels), widgets=widgets).start() for i, (path, label) in enumerate(zip(paths, labels)): image = cv2.imread(path) image = aap.preprocess(image) if d_type == 'train': b, g, r = cv2.mean(image)[:3] R.append(r) G.append(g) B.append(b) writer.add([image], [label]) pbar.update(i) writer.close() pbar.finish() if not os.path.exists(config.OUTPUT_BASE): os.makedirs(config.OUTPUT_BASE) # serialize means of R, G, B print('[INFO] serialzing means...') D = {'R': np.mean(R), 'G': np.mean(G), 'B': np.mean(B)} f = open(dataset_mean_path, 'w') f.write(json.dumps(D)) f.close() # serialize label encoder print('[INFO] serializing label encoder...') f = open(label_encoder_path, 'wb') f.write(pickle.dumps(le)) f.close()
# construct a list pairing the training, validation, and testing # image paths along with their corresponding labels and output HDF5 # files datasets = [("train", trainPaths, trainLabels, config.TRAIN_HDF5), ("val", valPaths, valLabels, config.VAL_HDF5), ("test", testPaths, testLabels, config.TEST_HDF5)] # initialize the lists of RGB channel averages (R, G, B) = ([], [], []) # loop over the dataset tuples for (dType, paths, labels, outputPath) in datasets: # create HDF5 writer print("[INFO] building {}...".format(outputPath)) writer = HDF5DatasetWriter((len(paths), 64, 64, 3), outputPath) # initialize the progress bar widgets = [ "Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start() # loop over the image paths for (i, (path, label)) in enumerate(zip(paths, labels)): # load the image from disk image = cv2.imread(path)
ap.add_argument('-b', '--buffer-size', type=int, default=1000, help='size of buffer') args = vars(ap.parse_args()) # grab the list of image paths and shuffle them image_paths = list(paths.list_images(args['dataset'])) random.shuffle(image_paths) buf_size = args['buffer_size'] # get the labels by extracting image paths labels = [image_path.split(os.path.sep)[-2] for image_path in image_paths] le = LabelEncoder() labels = le.fit_transform(labels) label_names = le.classes_ # construct HDF5 dataset writer dataset = HDF5DatasetWriter(args['output'], (len(image_paths), 224, 224, 3), buf_size=buf_size) dataset.storeClassLabels(label_names) # construct progressbar widgets = ['Convert images ', progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()] pbar = progressbar.ProgressBar(len(image_paths), widgets=widgets).start() for i in range(0, len(image_paths), buf_size): batch_paths = image_paths[i:i+buf_size] batch_labels = labels[i:i+buf_size] batch_images = [] for image_path in batch_paths: image = load_img(image_path, target_size=(224, 224)) image = img_to_array(image)
# construct a list pairing the training, validation, and testing # image paths along with their corresponding labels and output HDF5 files datasets = [ ("train", trainPaths, trainLabels, args['train_data']), ("test", testPaths, testLabels, args['test_data'])] # original size of generated license plate images IMAGE_WIDTH = 151 IMAGE_HEIGHT = 32 # loop over the images tuples for (dType, paths, labels, outputPath) in datasets: # create HDF5 writer print("[INFO] building {}...".format(outputPath)) writer = HDF5DatasetWriter((len(paths), IMAGE_HEIGHT, IMAGE_WIDTH), outputPath) # initialize the progress bar widgets = ["Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()] pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets) pbar.start() # loop over the image paths for (i, (path, label)) in enumerate(zip(paths, labels)): # load the image and process it # image = cv2.imread(path, cv2.IMREAD_COLOR) # don't use imread because bug with utf-8 paths stream = open(path, "rb") bytes = bytearray(stream.read()) numpyarray = np.asarray(bytes, dtype=np.uint8) image = cv2.imdecode(numpyarray, cv2.IMREAD_GRAYSCALE)
target = image[y + config.PAD:y + config.PAD + config.LABEL_SIZE, x + config.PAD:x + config.PAD + config.LABEL_SIZE] cv2.imwrite(os.path.sep.join([config.IMAGES, f'{total}.png']), crop) cv2.imwrite(os.path.sep.join([config.LABELS, f'{total}.png']), target) # load images and labels print('[INFO] loading image and labels...') image_paths = list(paths.list_images(config.IMAGES)) label_paths = list(paths.list_images(config.LABELS)) # define HDF5 dataset inputs_hdf5 = HDF5DatasetWriter( config.INPUTS_DB, (len(image_paths), config.INPUT_DIM, config.INPUT_DIM, 3)) outputs_hdf5 = HDF5DatasetWriter( config.OUTPUTS_DB, (len(label_paths), config.LABEL_SIZE, config.LABEL_SIZE, 3)) for image_path, label_path in zip(image_paths, label_paths): image = cv2.imread(image_path) label = cv2.imread(label_path) inputs_hdf5.add([image], [-1]) outputs_hdf5.add([label], [-1]) # close HDF5 dataset inputs_hdf5.close() outputs_hdf5.close()
random_state=42) (trainPaths, valPaths, trainLabels, valLabels) = split # construct a list pairing the training, validation, and testing # image paths along with their corresponding labels and output HDF5 # files dataset = [("train", trainPaths, trainLabels, config.TRAIN_HDF5), ('test', testPaths, testLabels, config.TEST_HDF5), ("val", valPaths, valLabels, config.VAL_HDF5)] aap = AspectAwarePreprocessor(256, 256) (R, G, B) = ([], [], []) for (dtype, paths, labels, outputPath) in dataset: print('[INFO] building {}...'.format(outputPath)) writer = HDF5DatasetWriter((len(paths), 256, 256, 3), outputPath=outputPath) widgets = [ 'Building Dataset: ', progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pgbar = progressbar.ProgressBar(max_value=len(paths), widgets=widgets).start() for (i, (path, label)) in enumerate(zip(paths, labels)): image = cv2.imread(path) image = aap.preprocess(image) if dtype == 'train': (b, g, r) = cv2.mean(image)[:3]
# ... via array slicing during training time print("[INFO] loading images...") imagePaths = list(paths.list_images(args['dataset'])) random.shuffle(imagePaths) # Extracting the class labels (angles) from the image paths then encode the labels labels = [p.split(os.path.sep)[-2] for p in imagePaths] le = LabelEncoder() labels = le.fit_transform(labels) # load vgg network print("[INFO] Loading network...") model = VGG16(weights='imagenet', include_top=False) # Initializing the HDF5 dataset writer, then store the class label names in the dataset dataset = HDF5DatasetWriter((len(imagePaths), 512 * 7 * 7), args['output'], dataKey="features", buffSize=args['buffer_size']) dataset.storeClassLabels(le.classes_) # initialize the progress bar widgets = ["Extracting features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()] pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start() # loop over the images in patches for i in np.arange(0, len(imagePaths), bs): # Extract the batch of images and labels, then initialize the list of actual images and labels batchPaths = imagePaths[i: i + bs] batchLabels = labels[i: i+bs] batchImages = [] # looping over the images and labels in current batch
trainImages.append(image) trainLabels.append(label) # check if this is a validation image elif usage == "PrivateTest": valImages.append(image) valLabels.append(label) # otherwise, this must be a testing image else: testImages.append(image) testLabels.append(label) # construct a list pairing the training, validation, and testing # images along with their corresponding labels and output HDF5 # files datasets = [ (trainImages, trainLabels, config.TRAIN_HDF5), (valImages, valLabels, config.VAL_HDF5), (testImages, testLabels, config.TEST_HDF5)] # loop over the dataset tuples for (images, labels, outputPath) in datasets: # create HDF5 writer print("[INFO] building {}...".format(outputPath)) writer = HDF5DatasetWriter((len(images), 48, 48), outputPath) # loop over the image and add them to the dataset for (image, label) in zip(images, labels): writer.add([image], [label]) # close the HDF5 writer writer.close() # close the input file f.close()
def main(): """Serialize the dataset """ # grab the paths to the images train_paths = list(paths.list_images(config.IMAGES_PATH)) train_labels = [ p.split(os.path.sep)[-1].split(".")[0] for p in train_paths ] label_encoder = LabelEncoder() train_labels = label_encoder.fit_transform(train_labels) # perform stratified sampling from the training set to build the # testing split from the training data split = train_test_split(train_paths, train_labels, test_size=config.NUM_TEST_IMAGES, stratify=train_labels, random_state=42) (train_paths, test_paths, train_labels, test_labels) = split # perform another stratified sampling, this time to build the validation data split = train_test_split(train_paths, train_labels, test_size=config.NUM_VAL_IMAGES, stratify=train_labels, random_state=42) (train_paths, val_paths, train_labels, val_labels) = split # construct a list pairing the training, validation, and testing image paths along # with their corresponding labels and output HDF5 files datasets = [ ("train", train_paths, train_labels, config.TRAIN_HDF5), ("val", val_paths, val_labels, config.VAL_HDF5), ("test", test_paths, test_labels, config.TEST_HDF5), ] # initialize the image preprocessor and the lists of RGB channel averages aap = AspectAwarePreprocessor(256, 256) (R, G, B) = ([], [], []) # loop over the dataset tuples for (dataset_type, path_list, labels, output_path) in datasets: # create HDF5 writer print("[INFO] building {}...".format(output_path)) writer = HDF5DatasetWriter((len(path_list), 256, 256, 3), output_path) # initialize the progress bar widgets = [ "Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(path_list), widgets=widgets).start() # loop over the image paths for (i, (path, label)) in enumerate(zip(path_list, labels)): # load the image and process it image = cv2.imread(path) image = aap.preprocess(image) # if we are building the training dataset, then compute the mean of # each channel in the image, then update the respective lists if dataset_type == "train": (b, g, r) = cv2.mean(image)[:3] R.append(r) G.append(g) B.append(b) # add the image and label # to the HDF5 dataset writer.add([image], [label]) pbar.update(i) # close the HDF5 writer pbar.finish() writer.close() # construct a dictionary of averages, then serialize the means to a JSON file print("[INFO] serializing means...") rgb_dict = {"R": np.mean(R), "G": np.mean(G), "B": np.mean(B)} f = open(config.DATASET_MEAN, "w") f.write(json.dumps(rgb_dict)) f.close()
def extract_backgrounds(archive_name, output_path, max_items=np.inf): print("[INFO] reading content of {}...".format(archive_name)) tar = tarfile.open(name=archive_name) files = tar.getnames() # create shuffled index list randomized_indexes = np.arange(len(files)) np.random.shuffle(randomized_indexes) # pick max number of items if max_items == np.inf or max_items > len(files): max_items = len(files) randomized_indexes = randomized_indexes[0:max_items] print("[INFO] building {}...".format(output_path)) writer = HDF5DatasetWriter( (len(randomized_indexes), IMAGE_HEIGHT, IMAGE_WIDTH), output_path) widgets = [ "Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(randomized_indexes), widgets=widgets).start() index = 0 for i, file in enumerate(files): if i not in randomized_indexes: continue f = tar.extractfile(file) if f is None: continue # skip directories try: image = im_from_file(f) finally: f.close() if image is None: continue # skip non image files # make same width and height, by cutting the larger dimension to the smaller dimension if image.shape[0] > image.shape[1]: image = image[:image.shape[1], :] else: image = image[:, :image.shape[0]] # resize to target-width and -height, keeping the aspect ratio if image.shape[0] != 256: image = cv2.resize(image, (256, 256)) # name from index name = "{:08}".format(index) # check image size if not image.shape == (IMAGE_HEIGHT, IMAGE_WIDTH): print("image with wrong size: %s" % name) continue # add the image and name to the HDF5 db writer.add([image], [name]) pbar.update(index) index += 1 # close the HDF5 writer pbar.finish() writer.close() print("[INFO] {} images saved to {}...".format(len(randomized_indexes), output_path))
f.close() #put image and label to hdf5 file # if j<5000: # trainImages.append(results) # trainLabels.append(char_ids_padded) # elif j<6000: # testImages.append(results) # testLabels.append(char_ids_padded) # elif j<7000: # valImages.append(results) # valLabels.append(char_ids_padded) trainImages.append(results) trainLabels.append(char_ids_padded) # datasets = [ # (trainImages, trainLabels, 'hdf5/train.hdf5'), # (valImages, valLabels, 'hdf5/val.hdf5'), # (testImages, testLabels, 'hdf5/test.hdf5')] datasets = [(trainImages, trainLabels, 'hdf5/test3.hdf5')] for trainImages, trainLabels, path in datasets: print("[INFO] building {}...".format(path)) print("len: ", len(trainLabels)) writer = HDF5DatasetWriter((len(trainLabels), 300, 32), path) for (image, label) in zip(trainImages, trainLabels): writer.add([image], [label]) writer.close() #f.close()
# grab the list of images that we'll be describing then randomly # shuffle them to allow for easy training and testing splits via # array slicing during training time print("[INFO] loading images...") imagePaths = list(paths.list_images(config.TEST_IMAGES_PATH)) # random.shuffle(imagePaths) # pre-shuffled, nothing wrong with shuffling again ids = [os.path.splitext(os.path.basename(path))[0] for path in imagePaths] labels = [0 for i in ids] # encode the labels # le = LabelEncoder() # labels = le.fit_transform(labels) # initialize the HDF5 dataset writer, then store the class label names in the dataset dataset = HDF5DatasetWriter((len(imagePaths), config.INPUT_SIZE, config.INPUT_SIZE, 3), config.TEST_HDF5) dataset.storeClassLabels(classNames) # initialize the progress bar widgets = ["Saving Images: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()] pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start() # loop over the images in batches for i in np.arange(0, len(imagePaths)): # Grab values imagePath = imagePaths[i] label = labels[i] _id = ids[i]
imagePaths = list(paths.list_images(args["dataset"])) random.shuffle(imagePaths) #value in config is value after rotate, so width is height, height is width width = config.WIDTH #160 height = config.HEIGHT #32 k1 = width / height (trainImages, trainLabels) = ([], []) (valImages, valLabels) = ([], []) (testImages, testLabels) = ([], []) writer = HDF5DatasetWriter((20000, config.HEIGHT, config.WIDTH), 'hdf5/val.hdf5', max_label_length=config.MAX_LENGTH) for j, imagePath in tqdm(enumerate(imagePaths)): imagePath2 = '' for k in imagePath: if k != '\\': imagePath2 += k #print(imagePath2) # imagePath = str(imagePath) image = cv2.imread(imagePath2, cv2.IMREAD_GRAYSCALE) k2 = image.shape[1] / image.shape[0] if k2 < k1: resized = imutils.resize(image, height=height) zeros = np.zeros((height, width - resized.shape[1]))
datasets = [('train', train_paths, train_labels, config.TRAIN_HDF5), ('val', val_paths, val_labels, config.VAL_HDF5), ('test', test_paths, test_labels, config.TEST_HDF5)] aap = AspectAwarePreprocessor(256, 256) for d_type, paths, labels, hdf5_path in datasets: widgets = [ f'Building {d_type}:', ' ', progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start() writer = HDF5DatasetWriter(hdf5_path, (len(paths), 256, 256, 3)) for i, (path, label) in enumerate(zip(paths, labels)): image = cv2.imread(path) image = aap.preprocess(image) if d_type == 'train': b, g, r = cv2.mean(image)[:3] b_mean.append(b) g_mean.append(g) r_mean.append(r) writer.add([image], [label]) pbar.update(i)
("train", trainPaths, trainLabels, os.path.join(args["output"], "train.hdf5")), ("val", valPaths, valLabels, os.path.join(args["output"], "val.hdf5")), ("test", testPaths, testLabels, os.path.join(args["output"], "test.hdf5")) ] # initialize the image pre-processor and the lists of RGB channel averages aap = AspectAwarePreprocessor(256, 256) (R, G, B) = ([], [], []) # loop over the dataset tuples for (dType, paths, labels, outputPath) in datasets: # create HDF5 writer print("[INFO] building {}...".format(outputPath)) writer = HDF5DatasetWriter((len(paths), 256, 256, 3), outputPath, bufSize=args["buffer_size"]) # initialize the progress bar widgets = [ "Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start() # loop over the image paths for (i, (path, label)) in enumerate(zip(paths, labels)): # load the image and process it image = cv2.imread(path)
# construct a list pairing the training, validation, and testing # image paths along with their corresponding labels and output HDF5 files datasets = [("train", trainPaths, trainLabels, trainIds, config.TRAIN_HDF5), ("val", valPaths, valLabels, valIds, config.VAL_HDF5), ("test", testPaths, testLabels, testIds, config.TEST_HDF5)] # initialize the image pre-processor and the lists of RGB channel averages aap = AspectAwarePreprocessor(config.INPUT_SIZE, config.INPUT_SIZE) (R, G, B) = ([], [], []) # loop over the dataset tuples for (dType, paths, labels, ids, outputPath) in datasets: # create HDF5 writer print("[INFO] building {}...".format(outputPath)) writer = HDF5DatasetWriter( (len(paths), config.INPUT_SIZE, config.INPUT_SIZE, 3), outputPath) writer.storeClassLabels(le.classes_) # initialize the progress bar widgets = [ "Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start() # loop over the image paths for (i, (path, label, _id)) in enumerate(zip(paths, labels, ids)): # load the image and process it image = cv2.imread(path)
"{}.png".format(total)]) # write the images to disk cv2.imwrite(cropPath, crop) cv2.imwrite(targetPath, target) # increment the crop total total += 1 # grab the paths to the images print("[INFO] building HDF5 datasets...") inputPaths = sorted(list(paths.list_images(config.IMAGES))) outputPaths = sorted(list(paths.list_images(config.LABELS))) # initialize the HDF5 datasets inputWriter = HDF5DatasetWriter((len(inputPaths), config.INPUT_DIM, config.INPUT_DIM, 3), config.INPUTS_DB) outputWriter = HDF5DatasetWriter((len(outputPaths), config.LABEL_SIZE, config.LABEL_SIZE, 3), config.OUTPUTS_DB) # loop over the images for (inputPath, outputPath) in zip(inputPaths, outputPaths): # load the two images and add them to their respective datasets inputImage = cv2.imread(inputPath) outputImage = cv2.imread(outputPath) inputWriter.add([inputImage], [-1]) outputWriter.add([outputImage], [-1]) # close the HDF5 datasets inputWriter.close() outputWriter.close()
def main(): """Extract features from Cats vs. Dogs dataset using ResNet50 """ # construct the argument parse and parse the arguments args = argparse.ArgumentParser() args.add_argument("-d", "--dataset", required=True, help="path to input dataset") args.add_argument("-o", "--output", required=True, help="path to output HDF5 file") args.add_argument("-b", "--batch-size", type=int, default=16, help="batch size of images to be passed through network") args.add_argument("-s", "--buffer-size", type=int, default=1000, help="size of feature extraction buffer") args = vars(args.parse_args()) # store the batch size in a convenience variable batch_size = args["batch_size"] # grab the list of images that we'll be describing then randomly shuffle them to allow # for easy training and testing splits via array slicing during training time print("[INFO] loading images...") image_paths = list(paths.list_images(args["dataset"])) random.shuffle(image_paths) # extract the class labels from the image paths then encode the labels labels = [p.split(os.path.sep)[-1].split(".")[0] for p in image_paths] label_encoder = LabelEncoder() labels = label_encoder.fit_transform(labels) # load the ResNet50 network print("[INFO] loading network...") model = ResNet50(weights="imagenet", include_top=False) # initialize the HDF5 dataset writer, then store the class label names in the dataset dataset = HDF5DatasetWriter((len(image_paths), 100352), args["output"], data_key="features", buffer_size=args["buffer_size"]) dataset.store_class_labels(label_encoder.classes_) # initialize the progress bar widgets = [ "Extracting Features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(image_paths), widgets=widgets).start() # loop over the images in batches for i in np.arange(0, len(image_paths), batch_size): # extract the batch of images and labels, then initialize the # list of actual images that will be passed through the network # for feature extraction batch_paths = image_paths[i:i + batch_size] batch_labels = labels[i:i + batch_size] batch_images = [] # loop over the images and labels in the current batch for (_, image_path) in enumerate(batch_paths): # load the input image using the Keras helper utility # while ensuring the image is resized to 224x224 pixels image = load_img(image_path, target_size=(224, 224)) image = img_to_array(image) # preprocess the image by (1) expanding the dimensions and # (2) subtracting the mean RGB pixel intensity from the ImageNet dataset image = np.expand_dims(image, axis=0) image = imagenet_utils.preprocess_input(image) # add the image to the batch batch_images.append(image) # pass the images through the network and use the outputs as our actual features batch_images = np.vstack(batch_images) features = model.predict(batch_images, batch_size=batch_size) # reshape the features so that each image is represented by # a flattened feature vector of the `MaxPooling2D` outputs features = features.reshape((features.shape[0], 100352)) # add the features and labels to our HDF5 dataset dataset.add(features, batch_labels) pbar.update(i) # close the dataset dataset.close() pbar.finish()
def main(): """Serialize the dataset """ # grab the paths to the training images, then extract the training class labels and encode them train_paths = list(paths.list_images(config.TRAIN_IMAGES)) train_labels = [p.split(os.path.sep)[-3] for p in train_paths] label_encoder = LabelEncoder() train_labels = label_encoder.fit_transform(train_labels) # perform stratified sampling from the training set to construct a testing set split = train_test_split(train_paths, train_labels, test_size=config.NUM_TEST_IMAGES, stratify=train_labels, random_state=42) (train_paths, test_paths, train_labels, test_labels) = split # load the validation filename => class from file and then use these # mappings to build the validation paths and label lists mapping = open(config.VAL_MAPPINGS).read().strip().split("\n") mapping = [r.split("\t")[:2] for r in mapping] val_paths = [os.path.sep.join([config.VAL_IMAGES, m[0]]) for m in mapping] val_labels = label_encoder.transform([m[1] for m in mapping]) # construct a list pairing the training, validation, and testing image paths # along with their corresponding labels and output HDF5 files datasets = [ ("train", train_paths, train_labels, config.TRAIN_HDF5), ("val", val_paths, val_labels, config.VAL_HDF5), ("test", test_paths, test_labels, config.TEST_HDF5), ] # initialize the lists of RGB channel averages (R, G, B) = ([], [], []) # loop over the dataset tuples for (dataset_type, image_paths, labels, output_path) in datasets: # create HDF5 writer print("[INFO] building {}...".format(output_path)) writer = HDF5DatasetWriter((len(image_paths), 64, 64, 3), output_path) # initialize the progress bar widgets = [ "Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(image_paths), widgets=widgets).start() # loop over the image paths for (i, (path, label)) in enumerate(zip(image_paths, labels)): # load the image from disk image = cv2.imread(path) # if we are building the training dataset, then compute the mean of each # channel in the image, then update the respective lists if dataset_type == "train": (b, g, r) = cv2.mean(image)[:3] R.append(r) G.append(g) B.append(b) # add the image and label to the HDF5 dataset writer.add([image], [label]) pbar.update(i) # close the HDF5 writer pbar.finish() writer.close() # construct a dictionary of averages, then serialize the means to a JSON file print("[INFO] serializing means...") rgb_dict = {"R": np.mean(R), "G": np.mean(G), "B": np.mean(B)} f = open(config.DATASET_MEAN, "w") f.write(json.dumps(rgb_dict)) f.close()
image_paths = list(paths.list_images(args['dataset'])) random.shuffle(image_paths) labels = [ image_path.split(os.path.sep)[-1].split('.')[0] for image_path in image_paths ] le = LabelEncoder() labels = le.fit_transform(labels) # load model print('[INFO] loading model...') model = ResNet50(weights='imagenet', include_top=False, pooling='avg') # initialize HDF5 dataset writer dataset = HDF5DatasetWriter((len(labels), 2048), image_paths, data_key='features', buf_size=args['buffer_size']) dataset.storeClassLabels(le.classes_) # construct progress bar widgets = [ 'Extracting features: ', progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(labels), widgets=widgets).start() # loop over batches of images for i in range(0, len(labels), batch_size): batch_paths = image_paths[i:i + batch_size]
# extract labels from image paths labels = [image_path.split(os.path.sep)[-2] for image_path in image_paths] le = LabelEncoder() labels = le.fit_transform(labels) # store batch-size for convenience batch_size = args['batch_size'] # load VGG16 print('[INFO] loading VGG16...') model = VGG16(weights='imagenet', include_top=False) # initialize the HDF5 dataset writer dataset = HDF5DatasetWriter(args['output'], (len(image_paths), 7 * 7 * 512), data_key='features', buf_size=args['buffer_size']) dataset.storeClassLabels(le.classes_) # construct progressbar widgets = [ 'Extracting features ', progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(image_paths), widgets=widgets).start() for i in range(0, len(image_paths), batch_size): batch_paths = image_paths[i:i + batch_size]
random_state=42) # construct a list pairing the training, validation and testing image paths along with their corresponding labels and output HDF5 files datasets = [('train', train_paths, train_labels, config.TRAIN_HDF5), ('val', val_paths, val_labels, config.VAL_HDF5), ('test', test_paths, test_labels, config.TEST_HDF5)] # initialize the image preprocessor and list of RGB channel averages aap = AspectAwarePreprocessor(256, 256) R, G, B = [], [], [] # loop over the dataset tuples for d_type, paths, labels, output_path in datasets: # create HDF5 writer print(f'[INFO] building {output_path}...') writer = HDF5DatasetWriter((len(labels), 256, 256, 3), output_path) # initialize progress bar widgets = [ 'Building dataset: ', progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(labels), widgets=widgets).start() # loop over the image paths for i, (path, label) in enumerate(zip(paths, labels)): image = cv2.imread(path) image = aap.preprocess(image)
random.shuffle(imagePaths) # extract the class labels from the image paths then encode the # label labels = [p.split(os.path.sep)[-2] for p in imagePaths] le = LabelEncoder() labels = le.fit_transform(labels) # load the VGG16 network print("[INFO] loading network...") model = VGG16(weights="imagenet", include_top=False) # initialize the HDF5 dataset writer, then store the class label # names in the dataset dataset = HDF5DatasetWriter((len(imagePaths), 512 * 7 * 7), args["output"], dataKey="features", bufSize=args["buffer_size"]) dataset.storeClassLabels(le.classes_) # initialize the progress bar widgets = [ "Extracting Features: ", progressbar.Percentage(), " ", progressbar.Bar(), progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start() # loop over the images in patches for i in np.arange(0, len(imagePaths), bs): # extract the batch of images and labels, then initialize the
# initialize DATASETS for easily accessing DATASETS = [('train', train_paths, train_labels, config.TRAIN_HDF5), ('val', val_paths, val_labels, config.VAL_HDF5), ('test', test_paths, test_labels, config.TEST_HDF5)] # initialize preprocessor aap = AspectAwarePreprocessor(256, 256) # construct list of R, G, B for mean R, G, B = [], [], [] # loop over DATASETS for d_type, image_paths, labels, output_path in DATASETS: #initialize HDF5 dataset writer writer = HDF5DatasetWriter(output_path, (len(labels), 256, 256, 3)) # construct progressbar widgets = [ f'Building {d_type}: ', progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(labels), widgets=widgets).start() # loop over image path for i, (image_path, label) in enumerate(zip(image_paths, labels)): image = cv2.imread(image_path) image = aap.preprocess(image)