Example #1
0
def build_hdf5(dataset, dataset_mean_path, label_encoder_path):
    # list of R, G, B means
    R, G, B = [], [], []

    # initialize image preprocessor
    aap = AspectAwarePreprocessor(256, 256)

    # loop over DATASETS
    for d_type, paths, labels, output_path in dataset:
        # construct HDF% dataset writer
        writer = HDF5DatasetWriter((len(labels), 256, 256, 3), output_path)
        # construct progress bar
        widgets = [
            f'Building {d_type}: ',
            progressbar.Percentage(), ' ',
            progressbar.Bar(), ' ',
            progressbar.ETA()
        ]
        pbar = progressbar.ProgressBar(maxval=len(labels),
                                       widgets=widgets).start()

        for i, (path, label) in enumerate(zip(paths, labels)):
            image = cv2.imread(path)

            image = aap.preprocess(image)

            if d_type == 'train':
                b, g, r = cv2.mean(image)[:3]
                R.append(r)
                G.append(g)
                B.append(b)

            writer.add([image], [label])
            pbar.update(i)

        writer.close()
        pbar.finish()

    if not os.path.exists(config.OUTPUT_BASE):
        os.makedirs(config.OUTPUT_BASE)

    # serialize means of R, G, B
    print('[INFO] serialzing means...')
    D = {'R': np.mean(R), 'G': np.mean(G), 'B': np.mean(B)}
    f = open(dataset_mean_path, 'w')
    f.write(json.dumps(D))
    f.close()

    # serialize label encoder
    print('[INFO] serializing label encoder...')
    f = open(label_encoder_path, 'wb')
    f.write(pickle.dumps(le))
    f.close()
# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output HDF5
# files
datasets = [("train", trainPaths, trainLabels, config.TRAIN_HDF5),
            ("val", valPaths, valLabels, config.VAL_HDF5),
            ("test", testPaths, testLabels, config.TEST_HDF5)]

# initialize the lists of RGB channel averages
(R, G, B) = ([], [], [])

# loop over the dataset tuples
for (dType, paths, labels, outputPath) in datasets:
    # create HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF5DatasetWriter((len(paths), 64, 64, 3), outputPath)

    # initialize the progress bar
    widgets = [
        "Building Dataset: ",
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start()

    # loop over the image paths
    for (i, (path, label)) in enumerate(zip(paths, labels)):
        # load the image from disk
        image = cv2.imread(path)
Example #3
0
ap.add_argument('-b', '--buffer-size', type=int, default=1000, help='size of buffer')
args = vars(ap.parse_args())

# grab the list of image paths and shuffle them
image_paths = list(paths.list_images(args['dataset']))
random.shuffle(image_paths)
buf_size = args['buffer_size']

# get the labels by extracting image paths
labels = [image_path.split(os.path.sep)[-2] for image_path in image_paths]
le = LabelEncoder()
labels = le.fit_transform(labels)
label_names = le.classes_

# construct HDF5 dataset writer
dataset = HDF5DatasetWriter(args['output'], (len(image_paths), 224, 224, 3), buf_size=buf_size)
dataset.storeClassLabels(label_names)

# construct progressbar
widgets = ['Convert images ', progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()]
pbar = progressbar.ProgressBar(len(image_paths), widgets=widgets).start()

for i in range(0, len(image_paths), buf_size):
    batch_paths = image_paths[i:i+buf_size]
    batch_labels = labels[i:i+buf_size]
    batch_images = []

    for image_path in batch_paths:
        image = load_img(image_path, target_size=(224, 224))
        image = img_to_array(image)
# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output HDF5 files
datasets = [
    ("train", trainPaths, trainLabels, args['train_data']),
    ("test", testPaths, testLabels, args['test_data'])]

# original size of generated license plate images
IMAGE_WIDTH = 151
IMAGE_HEIGHT = 32

# loop over the images tuples
for (dType, paths, labels, outputPath) in datasets:
    # create HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF5DatasetWriter((len(paths), IMAGE_HEIGHT, IMAGE_WIDTH), outputPath)

    # initialize the progress bar
    widgets = ["Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
    pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets)
    pbar.start()

    # loop over the image paths
    for (i, (path, label)) in enumerate(zip(paths, labels)):
        # load the image and process it
        # image = cv2.imread(path, cv2.IMREAD_COLOR)  # don't use imread because bug with utf-8 paths
        stream = open(path, "rb")
        bytes = bytearray(stream.read())
        numpyarray = np.asarray(bytes, dtype=np.uint8)
        image = cv2.imdecode(numpyarray, cv2.IMREAD_GRAYSCALE)
Example #5
0
            target = image[y + config.PAD:y + config.PAD + config.LABEL_SIZE,
                           x + config.PAD:x + config.PAD + config.LABEL_SIZE]

            cv2.imwrite(os.path.sep.join([config.IMAGES, f'{total}.png']),
                        crop)
            cv2.imwrite(os.path.sep.join([config.LABELS, f'{total}.png']),
                        target)

# load images and labels
print('[INFO] loading image and labels...')
image_paths = list(paths.list_images(config.IMAGES))
label_paths = list(paths.list_images(config.LABELS))

# define HDF5 dataset
inputs_hdf5 = HDF5DatasetWriter(
    config.INPUTS_DB,
    (len(image_paths), config.INPUT_DIM, config.INPUT_DIM, 3))
outputs_hdf5 = HDF5DatasetWriter(
    config.OUTPUTS_DB,
    (len(label_paths), config.LABEL_SIZE, config.LABEL_SIZE, 3))

for image_path, label_path in zip(image_paths, label_paths):
    image = cv2.imread(image_path)
    label = cv2.imread(label_path)

    inputs_hdf5.add([image], [-1])
    outputs_hdf5.add([label], [-1])

# close HDF5 dataset
inputs_hdf5.close()
outputs_hdf5.close()
Example #6
0
                         random_state=42)
(trainPaths, valPaths, trainLabels, valLabels) = split

# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output HDF5
# files
dataset = [("train", trainPaths, trainLabels, config.TRAIN_HDF5),
           ('test', testPaths, testLabels, config.TEST_HDF5),
           ("val", valPaths, valLabels, config.VAL_HDF5)]

aap = AspectAwarePreprocessor(256, 256)
(R, G, B) = ([], [], [])

for (dtype, paths, labels, outputPath) in dataset:
    print('[INFO] building {}...'.format(outputPath))
    writer = HDF5DatasetWriter((len(paths), 256, 256, 3),
                               outputPath=outputPath)
    widgets = [
        'Building Dataset: ',
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pgbar = progressbar.ProgressBar(max_value=len(paths),
                                    widgets=widgets).start()

    for (i, (path, label)) in enumerate(zip(paths, labels)):
        image = cv2.imread(path)
        image = aap.preprocess(image)

        if dtype == 'train':
            (b, g, r) = cv2.mean(image)[:3]
Example #7
0
# ... via array slicing during training time
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args['dataset']))
random.shuffle(imagePaths)

# Extracting the class labels (angles) from the image paths then encode the labels
labels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
labels = le.fit_transform(labels)

# load vgg network
print("[INFO] Loading network...")
model = VGG16(weights='imagenet', include_top=False)

# Initializing the HDF5 dataset writer, then store the class label names in the dataset
dataset = HDF5DatasetWriter((len(imagePaths), 512 * 7 * 7), args['output'], dataKey="features",
                            buffSize=args['buffer_size'])
dataset.storeClassLabels(le.classes_)

# initialize the progress bar
widgets = ["Extracting features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()

# loop over the images in patches
for i in np.arange(0, len(imagePaths), bs):

    # Extract the batch of images and labels, then initialize the list of actual images and labels
    batchPaths = imagePaths[i: i + bs]
    batchLabels = labels[i: i+bs]
    batchImages = []

    # looping over the images and labels in current batch
        trainImages.append(image)
        trainLabels.append(label)
    # check if this is a validation image
    elif usage == "PrivateTest":
        valImages.append(image)
        valLabels.append(label)
    # otherwise, this must be a testing image
    else:
        testImages.append(image)
        testLabels.append(label)

# construct a list pairing the training, validation, and testing
# images along with their corresponding labels and output HDF5
# files
datasets = [
(trainImages, trainLabels, config.TRAIN_HDF5),
(valImages, valLabels, config.VAL_HDF5),
(testImages, testLabels, config.TEST_HDF5)]

# loop over the dataset tuples
for (images, labels, outputPath) in datasets:
    # create HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF5DatasetWriter((len(images), 48, 48), outputPath)
    # loop over the image and add them to the dataset
    for (image, label) in zip(images, labels):
        writer.add([image], [label])
        # close the HDF5 writer
    writer.close()
# close the input file
f.close()
Example #9
0
def main():
    """Serialize the dataset
    """
    # grab the paths to the images
    train_paths = list(paths.list_images(config.IMAGES_PATH))
    train_labels = [
        p.split(os.path.sep)[-1].split(".")[0] for p in train_paths
    ]
    label_encoder = LabelEncoder()
    train_labels = label_encoder.fit_transform(train_labels)

    # perform stratified sampling from the training set to build the
    # testing split from the training data
    split = train_test_split(train_paths,
                             train_labels,
                             test_size=config.NUM_TEST_IMAGES,
                             stratify=train_labels,
                             random_state=42)
    (train_paths, test_paths, train_labels, test_labels) = split

    # perform another stratified sampling, this time to build the validation data
    split = train_test_split(train_paths,
                             train_labels,
                             test_size=config.NUM_VAL_IMAGES,
                             stratify=train_labels,
                             random_state=42)
    (train_paths, val_paths, train_labels, val_labels) = split

    # construct a list pairing the training, validation, and testing image paths along
    # with their corresponding labels and output HDF5 files
    datasets = [
        ("train", train_paths, train_labels, config.TRAIN_HDF5),
        ("val", val_paths, val_labels, config.VAL_HDF5),
        ("test", test_paths, test_labels, config.TEST_HDF5),
    ]

    # initialize the image preprocessor and the lists of RGB channel averages
    aap = AspectAwarePreprocessor(256, 256)
    (R, G, B) = ([], [], [])
    # loop over the dataset tuples
    for (dataset_type, path_list, labels, output_path) in datasets:
        # create HDF5 writer
        print("[INFO] building {}...".format(output_path))
        writer = HDF5DatasetWriter((len(path_list), 256, 256, 3), output_path)
        # initialize the progress bar
        widgets = [
            "Building Dataset: ",
            progressbar.Percentage(), " ",
            progressbar.Bar(), " ",
            progressbar.ETA()
        ]
        pbar = progressbar.ProgressBar(maxval=len(path_list),
                                       widgets=widgets).start()
        # loop over the image paths
        for (i, (path, label)) in enumerate(zip(path_list, labels)):
            # load the image and process it
            image = cv2.imread(path)
            image = aap.preprocess(image)
            # if we are building the training dataset, then compute the mean of
            # each channel in the image, then update the respective lists
            if dataset_type == "train":
                (b, g, r) = cv2.mean(image)[:3]
                R.append(r)
                G.append(g)
                B.append(b)
            # add the image and label # to the HDF5 dataset
            writer.add([image], [label])
            pbar.update(i)
        # close the HDF5 writer
        pbar.finish()
        writer.close()
    # construct a dictionary of averages, then serialize the means to a JSON file
    print("[INFO] serializing means...")
    rgb_dict = {"R": np.mean(R), "G": np.mean(G), "B": np.mean(B)}
    f = open(config.DATASET_MEAN, "w")
    f.write(json.dumps(rgb_dict))
    f.close()
def extract_backgrounds(archive_name, output_path, max_items=np.inf):
    print("[INFO] reading content of {}...".format(archive_name))
    tar = tarfile.open(name=archive_name)
    files = tar.getnames()

    # create shuffled index list
    randomized_indexes = np.arange(len(files))
    np.random.shuffle(randomized_indexes)

    # pick max number of items
    if max_items == np.inf or max_items > len(files):
        max_items = len(files)

    randomized_indexes = randomized_indexes[0:max_items]

    print("[INFO] building {}...".format(output_path))
    writer = HDF5DatasetWriter(
        (len(randomized_indexes), IMAGE_HEIGHT, IMAGE_WIDTH), output_path)

    widgets = [
        "Building Dataset: ",
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(randomized_indexes),
                                   widgets=widgets).start()
    index = 0

    for i, file in enumerate(files):

        if i not in randomized_indexes:
            continue

        f = tar.extractfile(file)
        if f is None:
            continue  # skip directories
        try:
            image = im_from_file(f)
        finally:
            f.close()
        if image is None:
            continue  # skip non image files

        # make same width and height, by cutting the larger dimension to the smaller dimension
        if image.shape[0] > image.shape[1]:
            image = image[:image.shape[1], :]
        else:
            image = image[:, :image.shape[0]]

        # resize to target-width and -height, keeping the aspect ratio
        if image.shape[0] != 256:
            image = cv2.resize(image, (256, 256))

        # name from index
        name = "{:08}".format(index)

        # check image size
        if not image.shape == (IMAGE_HEIGHT, IMAGE_WIDTH):
            print("image with wrong size: %s" % name)
            continue

        # add the image and name to the HDF5 db
        writer.add([image], [name])
        pbar.update(index)
        index += 1

    # close the HDF5 writer
    pbar.finish()
    writer.close()
    print("[INFO] {} images saved to {}...".format(len(randomized_indexes),
                                                   output_path))
Example #11
0
    f.close()

    #put image and label to hdf5 file
    # if j<5000:
    # 	trainImages.append(results)
    # 	trainLabels.append(char_ids_padded)
    # elif j<6000:
    # 	testImages.append(results)
    # 	testLabels.append(char_ids_padded)
    # elif j<7000:
    # 	valImages.append(results)
    # 	valLabels.append(char_ids_padded)
    trainImages.append(results)
    trainLabels.append(char_ids_padded)

# datasets = [
# 	(trainImages, trainLabels, 'hdf5/train.hdf5'),
# 	(valImages, valLabels, 'hdf5/val.hdf5'),
# 	(testImages, testLabels, 'hdf5/test.hdf5')]
datasets = [(trainImages, trainLabels, 'hdf5/test3.hdf5')]

for trainImages, trainLabels, path in datasets:
    print("[INFO] building {}...".format(path))
    print("len: ", len(trainLabels))
    writer = HDF5DatasetWriter((len(trainLabels), 300, 32), path)

    for (image, label) in zip(trainImages, trainLabels):
        writer.add([image], [label])
    writer.close()
#f.close()
# grab the list of images that we'll be describing then randomly
# shuffle them to allow for easy training and testing splits via
# array slicing during training time
print("[INFO] loading images...")
imagePaths = list(paths.list_images(config.TEST_IMAGES_PATH))
# random.shuffle(imagePaths) # pre-shuffled, nothing wrong with shuffling again
ids = [os.path.splitext(os.path.basename(path))[0] for path in imagePaths]
labels = [0 for i in ids]

# encode the labels
# le = LabelEncoder()
# labels = le.fit_transform(labels)

# initialize the HDF5 dataset writer, then store the class label names in the dataset
dataset = HDF5DatasetWriter((len(imagePaths), config.INPUT_SIZE, config.INPUT_SIZE, 3),
	config.TEST_HDF5)
dataset.storeClassLabels(classNames)

# initialize the progress bar
widgets = ["Saving Images: ", progressbar.Percentage(), " ",
	progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(imagePaths),
	widgets=widgets).start()

# loop over the images in batches
for i in np.arange(0, len(imagePaths)):
    # Grab values
	imagePath = imagePaths[i]
	label = labels[i]
	_id = ids[i]
imagePaths = list(paths.list_images(args["dataset"]))

random.shuffle(imagePaths)

#value in config is value after rotate, so width is height, height is width
width = config.WIDTH  #160
height = config.HEIGHT  #32
k1 = width / height

(trainImages, trainLabels) = ([], [])
(valImages, valLabels) = ([], [])
(testImages, testLabels) = ([], [])

writer = HDF5DatasetWriter((20000, config.HEIGHT, config.WIDTH),
                           'hdf5/val.hdf5',
                           max_label_length=config.MAX_LENGTH)
for j, imagePath in tqdm(enumerate(imagePaths)):
    imagePath2 = ''
    for k in imagePath:
        if k != '\\':
            imagePath2 += k
    #print(imagePath2)
    # imagePath = str(imagePath)

    image = cv2.imread(imagePath2, cv2.IMREAD_GRAYSCALE)

    k2 = image.shape[1] / image.shape[0]
    if k2 < k1:
        resized = imutils.resize(image, height=height)
        zeros = np.zeros((height, width - resized.shape[1]))
Example #14
0
datasets = [('train', train_paths, train_labels, config.TRAIN_HDF5),
            ('val', val_paths, val_labels, config.VAL_HDF5),
            ('test', test_paths, test_labels, config.TEST_HDF5)]

aap = AspectAwarePreprocessor(256, 256)

for d_type, paths, labels, hdf5_path in datasets:
    widgets = [
        f'Building {d_type}:', ' ',
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start()
    writer = HDF5DatasetWriter(hdf5_path, (len(paths), 256, 256, 3))

    for i, (path, label) in enumerate(zip(paths, labels)):
        image = cv2.imread(path)
        image = aap.preprocess(image)

        if d_type == 'train':
            b, g, r = cv2.mean(image)[:3]

            b_mean.append(b)
            g_mean.append(g)
            r_mean.append(r)

        writer.add([image], [label])

        pbar.update(i)
Example #15
0
    ("train", trainPaths, trainLabels,
     os.path.join(args["output"], "train.hdf5")),
    ("val", valPaths, valLabels, os.path.join(args["output"], "val.hdf5")),
    ("test", testPaths, testLabels, os.path.join(args["output"], "test.hdf5"))
]

# initialize the image pre-processor and the lists of RGB channel averages
aap = AspectAwarePreprocessor(256, 256)
(R, G, B) = ([], [], [])

# loop over the dataset tuples
for (dType, paths, labels, outputPath) in datasets:
    # create HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF5DatasetWriter((len(paths), 256, 256, 3),
                               outputPath,
                               bufSize=args["buffer_size"])

    # initialize the progress bar
    widgets = [
        "Building Dataset: ",
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start()

    # loop over the image paths
    for (i, (path, label)) in enumerate(zip(paths, labels)):
        # load the image and process it
        image = cv2.imread(path)
# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output HDF5 files
datasets = [("train", trainPaths, trainLabels, trainIds, config.TRAIN_HDF5),
            ("val", valPaths, valLabels, valIds, config.VAL_HDF5),
            ("test", testPaths, testLabels, testIds, config.TEST_HDF5)]

# initialize the image pre-processor and the lists of RGB channel averages
aap = AspectAwarePreprocessor(config.INPUT_SIZE, config.INPUT_SIZE)
(R, G, B) = ([], [], [])

# loop over the dataset tuples
for (dType, paths, labels, ids, outputPath) in datasets:
    # create HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF5DatasetWriter(
        (len(paths), config.INPUT_SIZE, config.INPUT_SIZE, 3), outputPath)
    writer.storeClassLabels(le.classes_)

    # initialize the progress bar
    widgets = [
        "Building Dataset: ",
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(paths), widgets=widgets).start()

    # loop over the image paths
    for (i, (path, label, _id)) in enumerate(zip(paths, labels, ids)):
        # load the image and process it
        image = cv2.imread(path)
Example #17
0
				"{}.png".format(total)])

			# write the images to disk
			cv2.imwrite(cropPath, crop)
			cv2.imwrite(targetPath, target)

			# increment the crop total
			total += 1

# grab the paths to the images
print("[INFO] building HDF5 datasets...")
inputPaths = sorted(list(paths.list_images(config.IMAGES)))
outputPaths = sorted(list(paths.list_images(config.LABELS)))

# initialize the HDF5 datasets
inputWriter = HDF5DatasetWriter((len(inputPaths), config.INPUT_DIM,
	config.INPUT_DIM, 3), config.INPUTS_DB)
outputWriter = HDF5DatasetWriter((len(outputPaths),
	config.LABEL_SIZE, config.LABEL_SIZE, 3), config.OUTPUTS_DB)

# loop over the images
for (inputPath, outputPath) in zip(inputPaths, outputPaths):
	# load the two images and add them to their respective datasets
	inputImage = cv2.imread(inputPath)
	outputImage = cv2.imread(outputPath)
	inputWriter.add([inputImage], [-1])
	outputWriter.add([outputImage], [-1])

# close the HDF5 datasets
inputWriter.close()
outputWriter.close()
Example #18
0
def main():
    """Extract features from Cats vs. Dogs dataset using ResNet50
    """
    # construct the argument parse and parse the arguments
    args = argparse.ArgumentParser()
    args.add_argument("-d",
                      "--dataset",
                      required=True,
                      help="path to input dataset")
    args.add_argument("-o",
                      "--output",
                      required=True,
                      help="path to output HDF5 file")
    args.add_argument("-b",
                      "--batch-size",
                      type=int,
                      default=16,
                      help="batch size of images to be passed through network")
    args.add_argument("-s",
                      "--buffer-size",
                      type=int,
                      default=1000,
                      help="size of feature extraction buffer")
    args = vars(args.parse_args())

    # store the batch size in a convenience variable
    batch_size = args["batch_size"]

    # grab the list of images that we'll be describing then randomly shuffle them to allow
    # for easy training and testing splits via array slicing during training time
    print("[INFO] loading images...")
    image_paths = list(paths.list_images(args["dataset"]))
    random.shuffle(image_paths)

    # extract the class labels from the image paths then encode the labels
    labels = [p.split(os.path.sep)[-1].split(".")[0] for p in image_paths]
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)

    # load the ResNet50 network
    print("[INFO] loading network...")
    model = ResNet50(weights="imagenet", include_top=False)

    # initialize the HDF5 dataset writer, then store the class label names in the dataset
    dataset = HDF5DatasetWriter((len(image_paths), 100352),
                                args["output"],
                                data_key="features",
                                buffer_size=args["buffer_size"])
    dataset.store_class_labels(label_encoder.classes_)

    # initialize the progress bar
    widgets = [
        "Extracting Features: ",
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(image_paths),
                                   widgets=widgets).start()

    # loop over the images in batches
    for i in np.arange(0, len(image_paths), batch_size):
        # extract the batch of images and labels, then initialize the
        # list of actual images that will be passed through the network
        # for feature extraction
        batch_paths = image_paths[i:i + batch_size]
        batch_labels = labels[i:i + batch_size]
        batch_images = []
        # loop over the images and labels in the current batch
        for (_, image_path) in enumerate(batch_paths):
            # load the input image using the Keras helper utility
            # while ensuring the image is resized to 224x224 pixels
            image = load_img(image_path, target_size=(224, 224))
            image = img_to_array(image)
            # preprocess the image by (1) expanding the dimensions and
            # (2) subtracting the mean RGB pixel intensity from the ImageNet dataset
            image = np.expand_dims(image, axis=0)
            image = imagenet_utils.preprocess_input(image)
            # add the image to the batch
            batch_images.append(image)
            # pass the images through the network and use the outputs as our actual features
            batch_images = np.vstack(batch_images)
            features = model.predict(batch_images, batch_size=batch_size)
            # reshape the features so that each image is represented by
            # a flattened feature vector of the `MaxPooling2D` outputs
            features = features.reshape((features.shape[0], 100352))
            # add the features and labels to our HDF5 dataset
            dataset.add(features, batch_labels)
            pbar.update(i)

    # close the dataset
    dataset.close()
    pbar.finish()
Example #19
0
def main():
    """Serialize the dataset
    """
    # grab the paths to the training images, then extract the training class labels and encode them
    train_paths = list(paths.list_images(config.TRAIN_IMAGES))
    train_labels = [p.split(os.path.sep)[-3] for p in train_paths]
    label_encoder = LabelEncoder()
    train_labels = label_encoder.fit_transform(train_labels)

    # perform stratified sampling from the training set to construct a testing set
    split = train_test_split(train_paths,
                             train_labels,
                             test_size=config.NUM_TEST_IMAGES,
                             stratify=train_labels,
                             random_state=42)
    (train_paths, test_paths, train_labels, test_labels) = split

    # load the validation filename => class from file and then use these
    # mappings to build the validation paths and label lists
    mapping = open(config.VAL_MAPPINGS).read().strip().split("\n")
    mapping = [r.split("\t")[:2] for r in mapping]
    val_paths = [os.path.sep.join([config.VAL_IMAGES, m[0]]) for m in mapping]
    val_labels = label_encoder.transform([m[1] for m in mapping])

    # construct a list pairing the training, validation, and testing image paths
    # along with their corresponding labels and output HDF5 files
    datasets = [
        ("train", train_paths, train_labels, config.TRAIN_HDF5),
        ("val", val_paths, val_labels, config.VAL_HDF5),
        ("test", test_paths, test_labels, config.TEST_HDF5),
    ]

    # initialize the lists of RGB channel averages
    (R, G, B) = ([], [], [])

    # loop over the dataset tuples
    for (dataset_type, image_paths, labels, output_path) in datasets:
        # create HDF5 writer
        print("[INFO] building {}...".format(output_path))
        writer = HDF5DatasetWriter((len(image_paths), 64, 64, 3), output_path)
        # initialize the progress bar
        widgets = [
            "Building Dataset: ",
            progressbar.Percentage(), " ",
            progressbar.Bar(), " ",
            progressbar.ETA()
        ]
        pbar = progressbar.ProgressBar(maxval=len(image_paths),
                                       widgets=widgets).start()

        # loop over the image paths
        for (i, (path, label)) in enumerate(zip(image_paths, labels)):
            # load the image from disk
            image = cv2.imread(path)

            # if we are building the training dataset, then compute the mean of each
            # channel in the image, then update the respective lists
            if dataset_type == "train":
                (b, g, r) = cv2.mean(image)[:3]
                R.append(r)
                G.append(g)
                B.append(b)

            # add the image and label to the HDF5 dataset
            writer.add([image], [label])
            pbar.update(i)

        # close the HDF5 writer
        pbar.finish()
        writer.close()

    # construct a dictionary of averages, then serialize the means to a JSON file
    print("[INFO] serializing means...")
    rgb_dict = {"R": np.mean(R), "G": np.mean(G), "B": np.mean(B)}
    f = open(config.DATASET_MEAN, "w")
    f.write(json.dumps(rgb_dict))
    f.close()
Example #20
0
image_paths = list(paths.list_images(args['dataset']))
random.shuffle(image_paths)
labels = [
    image_path.split(os.path.sep)[-1].split('.')[0]
    for image_path in image_paths
]
le = LabelEncoder()
labels = le.fit_transform(labels)

# load model
print('[INFO] loading model...')
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# initialize HDF5 dataset writer
dataset = HDF5DatasetWriter((len(labels), 2048),
                            image_paths,
                            data_key='features',
                            buf_size=args['buffer_size'])
dataset.storeClassLabels(le.classes_)

# construct progress bar
widgets = [
    'Extracting features: ',
    progressbar.Percentage(), ' ',
    progressbar.Bar(), ' ',
    progressbar.ETA()
]
pbar = progressbar.ProgressBar(maxval=len(labels), widgets=widgets).start()

# loop over batches of images
for i in range(0, len(labels), batch_size):
    batch_paths = image_paths[i:i + batch_size]
Example #21
0
# extract labels from image paths
labels = [image_path.split(os.path.sep)[-2] for image_path in image_paths]
le = LabelEncoder()
labels = le.fit_transform(labels)

# store batch-size for convenience
batch_size = args['batch_size']

# load VGG16
print('[INFO] loading VGG16...')
model = VGG16(weights='imagenet', include_top=False)

# initialize the HDF5 dataset writer
dataset = HDF5DatasetWriter(args['output'], (len(image_paths), 7 * 7 * 512),
                            data_key='features',
                            buf_size=args['buffer_size'])
dataset.storeClassLabels(le.classes_)

# construct progressbar
widgets = [
    'Extracting features ',
    progressbar.Percentage(), ' ',
    progressbar.Bar(), ' ',
    progressbar.ETA()
]
pbar = progressbar.ProgressBar(maxval=len(image_paths),
                               widgets=widgets).start()

for i in range(0, len(image_paths), batch_size):
    batch_paths = image_paths[i:i + batch_size]
Example #22
0
    random_state=42)

# construct a list pairing the training, validation and testing image paths along with their corresponding labels and output HDF5 files
datasets = [('train', train_paths, train_labels, config.TRAIN_HDF5),
            ('val', val_paths, val_labels, config.VAL_HDF5),
            ('test', test_paths, test_labels, config.TEST_HDF5)]

# initialize the image preprocessor and list of RGB channel averages
aap = AspectAwarePreprocessor(256, 256)
R, G, B = [], [], []

# loop over the dataset tuples
for d_type, paths, labels, output_path in datasets:
    # create HDF5 writer
    print(f'[INFO] building {output_path}...')
    writer = HDF5DatasetWriter((len(labels), 256, 256, 3), output_path)

    # initialize progress bar
    widgets = [
        'Building dataset: ',
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(labels), widgets=widgets).start()

    # loop over the image paths
    for i, (path, label) in enumerate(zip(paths, labels)):
        image = cv2.imread(path)
        image = aap.preprocess(image)
Example #23
0
random.shuffle(imagePaths)

# extract the class labels from the image paths then encode the
# label
labels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
labels = le.fit_transform(labels)

# load the VGG16 network
print("[INFO] loading network...")
model = VGG16(weights="imagenet", include_top=False)

# initialize the HDF5 dataset writer, then store the class label
# names in the dataset
dataset = HDF5DatasetWriter((len(imagePaths), 512 * 7 * 7),
                            args["output"],
                            dataKey="features",
                            bufSize=args["buffer_size"])
dataset.storeClassLabels(le.classes_)

# initialize the progress bar
widgets = [
    "Extracting Features: ",
    progressbar.Percentage(), " ",
    progressbar.Bar(),
    progressbar.ETA()
]
pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()

# loop over the images in patches
for i in np.arange(0, len(imagePaths), bs):
    # extract the batch of images and labels, then initialize the
Example #24
0
# initialize DATASETS for easily accessing
DATASETS = [('train', train_paths, train_labels, config.TRAIN_HDF5),
            ('val', val_paths, val_labels, config.VAL_HDF5),
            ('test', test_paths, test_labels, config.TEST_HDF5)]

# initialize preprocessor
aap = AspectAwarePreprocessor(256, 256)

# construct list of R, G, B for mean
R, G, B = [], [], []

# loop over DATASETS
for d_type, image_paths, labels, output_path in DATASETS:
    #initialize HDF5 dataset writer
    writer = HDF5DatasetWriter(output_path, (len(labels), 256, 256, 3))

    # construct progressbar
    widgets = [
        f'Building {d_type}: ',
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(maxval=len(labels), widgets=widgets).start()

    # loop over image path
    for i, (image_path, label) in enumerate(zip(image_paths, labels)):
        image = cv2.imread(image_path)
        image = aap.preprocess(image)