Beispiel #1
0
def build_hdf5(dataset, dataset_mean_path, label_encoder_path):
    # list of R, G, B means
    R, G, B = [], [], []

    # initialize image preprocessor
    aap = AspectAwarePreprocessor(256, 256)

    # loop over DATASETS
    for d_type, paths, labels, output_path in dataset:
        # construct HDF% dataset writer
        writer = HDF5DatasetWriter((len(labels), 256, 256, 3), output_path)
        # construct progress bar
        widgets = [
            f'Building {d_type}: ',
            progressbar.Percentage(), ' ',
            progressbar.Bar(), ' ',
            progressbar.ETA()
        ]
        pbar = progressbar.ProgressBar(maxval=len(labels),
                                       widgets=widgets).start()

        for i, (path, label) in enumerate(zip(paths, labels)):
            image = cv2.imread(path)

            image = aap.preprocess(image)

            if d_type == 'train':
                b, g, r = cv2.mean(image)[:3]
                R.append(r)
                G.append(g)
                B.append(b)

            writer.add([image], [label])
            pbar.update(i)

        writer.close()
        pbar.finish()

    if not os.path.exists(config.OUTPUT_BASE):
        os.makedirs(config.OUTPUT_BASE)

    # serialize means of R, G, B
    print('[INFO] serialzing means...')
    D = {'R': np.mean(R), 'G': np.mean(G), 'B': np.mean(B)}
    f = open(dataset_mean_path, 'w')
    f.write(json.dumps(D))
    f.close()

    # serialize label encoder
    print('[INFO] serializing label encoder...')
    f = open(label_encoder_path, 'wb')
    f.write(pickle.dumps(le))
    f.close()
Beispiel #2
0
def pre_process_data(dimensions,image_paths):
	# construct the image generator for data augmentation
	aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
		height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
		horizontal_flip=True, fill_mode="nearest")
	# initialize the image preprocessors
	aap = AspectAwarePreprocessor(dimensions,dimensions)
	iap = ImageToArrayPreprocessor()
	# load the dataset from disk then scale the raw pixel intensities to
	# the range [0, 1]
	sdl = SimpleDatasetLoader(preprocessors=[aap, iap])
	(data, labels) = sdl.load(imagePaths, verbose=500)
	data = data.astype("float") / 255.0
	return data, labels
def separate_data(imagePaths, classNames):
    print("[INFO] Separating data.......................")
    iap = ImageToArrayPreprocessor()
    aap = AspectAwarePreprocessor(WIDTH, HEIGHT)
    sdl = SimpleDatasetLoader(preprocessors=[aap, iap])
    (data, labels) = sdl.load(imagePaths, verbose=500)
    data = data.astype("float") / 255.0

    # partition the data into training and testing splits using 75% of
    # the data for training and the remaining 25% for testing
    (trainX, testX, trainY, testY) = train_test_split(data, labels,
                                                      test_size=0.25, random_state=42)

    # convert the labels from integers to vectors
    trainY = LabelBinarizer().fit_transform(trainY)
    testY = LabelBinarizer().fit_transform(testY)

    if len(classNames) == 2:
        trainY = np.hstack((trainY, 1 - trainY))
        testY = np.hstack((testY, 1 - testY))

    return [trainX, testX, trainY, testY]
args = vars(ap.parse_args())

print('[INFO] moving image to label folder.....')
im = IM.MoveImageToLabel(dataPath=args['dataset'])
im.makeFolder()
im.move()

print("[INFO] loading images...")
imagePaths = [
    x for x in list(paths.list_images(args['dataset']))
    if x.split(os.path.sep)[-2] != 'jpg'
]
classNames = [pt.split(os.path.sep)[-2] for pt in imagePaths]
classNames = [str(x) for x in np.unique(classNames)]

aap = AAP.AspectAwarePreprocesser(64, 64)
iap = IAP.ImageToArrayPreprocess()

sdl = SDL.SimpleDatasetLoader(preprocessors=[aap, iap])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.astype('float') / 255.0

(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels,
                                                  test_size=0.25,
                                                  random_state=43)

trainY = LabelBinarizer().fit_transform(trainY)
testY = LabelBinarizer().fit_transform(testY)

print("[INFO] compiling model....")
Beispiel #5
0
aug = ImageDataGenerator(rotation_range=30,
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         shear_range=0.2,
                         zoom_range=0.2,
                         horizontal_flip=True,
                         fill_mode='nearest')

# 从磁盘中加载图片,并提取标签
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args['dataset']))
classNames = [pt.split(os.path.sep)[-2] for pt in imagePaths]
classNames = [str(x) for x in np.unique(classNames)]

# 初始化图像预处理
aap = AAP.AspectAwarePreprocesser(224, 224)
iap = ITAP.ImageToArrayPreprocess()
# 加载图像数据,并进行图像数据预处理
sdl = SDL.SimpleDatasetLoader(preprocessors=[aap, iap])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.astype("float") / 255.0

# 数据划分
(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels,
                                                  test_size=0.25,
                                                  random_state=42)
# 标签进行编码化处理
trainY = LabelBinarizer().fit_transform(trainY)
testY = LabelBinarizer().fit_transform(testY)
Beispiel #6
0
print("Number of Images in each category")
for i in range(config.NUM_CLASSES):
    print(i, trainLabels.count(i))

split = train_test_split(trainPaths,
                         trainLabels,
                         test_size=NUM_VAL_IMAGES,
                         stratify=trainLabels,
                         random_state=42)
(trainPaths, valPaths, trainLabels, valLabels) = split

datasets = [("train", trainPaths, trainLabels, TRAIN_HDF5),
            ("val", valPaths, valLabels, VAL_HDF5),
            ("test", testPaths, testLabels, TEST_HDF5)]

aap = AspectAwarePreprocessor(IMAGE_HEIGHT, IMAGE_WIDTH)
(R, G, B) = ([], [], [])

# loop over the dataset tuples
IMAGE_PATH = IMAGES_PATH
for (dType, paths, labels, outputPath) in datasets:
    # create HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF5DatasetWriter((len(paths), IMAGE_HEIGHT, IMAGE_HEIGHT, 3),
                               outputPath)

    # initialize the progress bar
    widgets = [
        "Building Dataset: ",
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
# perform another stratified sampling, this time to build the validation data
split = train_test_split(trainPaths,
                         trainLabels,
                         trainIds,
                         test_size=round(len(imagePaths) * 0.15),
                         stratify=trainLabels)
(trainPaths, valPaths, trainLabels, valLabels, trainIds, valIds) = split

# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output HDF5 files
datasets = [("train", trainPaths, trainLabels, trainIds, config.TRAIN_HDF5),
            ("val", valPaths, valLabels, valIds, config.VAL_HDF5),
            ("test", testPaths, testLabels, testIds, config.TEST_HDF5)]

# initialize the image pre-processor and the lists of RGB channel averages
aap = AspectAwarePreprocessor(config.INPUT_SIZE, config.INPUT_SIZE)
(R, G, B) = ([], [], [])

# loop over the dataset tuples
for (dType, paths, labels, ids, outputPath) in datasets:
    # create HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF5DatasetWriter(
        (len(paths), config.INPUT_SIZE, config.INPUT_SIZE, 3), outputPath)
    writer.storeClassLabels(le.classes_)

    # initialize the progress bar
    widgets = [
        "Building Dataset: ",
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import os

ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset")
args = vars(ap.parse_args())

print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
classNames = [pt.split(os.path.sep)[-2] for pt in imagePaths]
classNames = [str(x) for x in np.unique(classNames)]

aap = AspectAwarePreprocessor(64, 64)
iap = ImageToArrayPreprocessor()

sdl = SimpleDatasetLoader(preprocessors=[aap, iap])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.astype("float") / 255.0

(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels,
                                                  test_size=0.25,
                                                  random_state=42)

trainY = LabelBinarizer().fit_transform(trainY)
testY = LabelBinarizer().fit_transform(testY)

print("[INFO] compiling model...")
Beispiel #9
0
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         shear_range=0.2,
                         zoom_range=0.2,
                         horizontal_flip=True,
                         fill_mode="nearest")

# grab the list of images that we'll be describing, then extract
# the class label names from the image paths
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
classNames = [pt.split(os.path.sep)[-2] for pt in imagePaths]
classNames = [str(x) for x in np.unique(classNames)]

# initialize the image preprocessors
aap = AspectAwarePreprocessor(224, 224)
iap = ImageToArrayPreprocessor()

# load the dataset from disk then scale the raw pixel intensities to
# the range [0, 1]
sdl = SimpleDatasetLoader(preprocessors=[aap, iap])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.astype("float") / 255.0

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels,
                                                  test_size=0.25,
                                                  random_state=42)
Beispiel #10
0
def main():
    """Fine tune VGG16
    """
    # construct the argument parse and parse the arguments
    args = argparse.ArgumentParser()
    args.add_argument("-d",
                      "--dataset",
                      required=True,
                      help="path to input dataset")
    args.add_argument("-m",
                      "--model",
                      required=True,
                      help="path to output model")
    args = vars(args.parse_args())

    # construct the image generator for data augmentation
    augmentation = ImageDataGenerator(
        rotation_range=30,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode="nearest",
    )

    # grab the list of images that we'll be describing, then extract
    # the class label names from the image paths
    print("[INFO] loading images...")
    image_paths = list(paths.list_images(args["dataset"]))
    class_names = [pt.split(os.path.sep)[-2] for pt in image_paths]
    class_names = [str(x) for x in np.unique(class_names)]

    # initialize the image preprocessors
    aspect_aware_preprocessor = AspectAwarePreprocessor(224, 224)
    image_to_array_preprocessor = ImageToArrayPreprocessor()

    # load the dataset from disk then scale the raw pixel intensities to the range [0, 1]
    simple_dataset_loader = SimpleDatasetLoader(
        preprocessors=[aspect_aware_preprocessor, image_to_array_preprocessor])
    (data, labels) = simple_dataset_loader.load(image_paths, verbose=500)
    data = data.astype("float") / 255.0

    # partition the data into training and testing splits using 75% of
    # the data for training and the remaining 25% for testing
    (train_x, test_x, train_y, test_y) = train_test_split(data,
                                                          labels,
                                                          test_size=0.25,
                                                          random_state=42)
    # convert the labels from integers to vectors
    train_y = LabelBinarizer().fit_transform(train_y)
    test_y = LabelBinarizer().transform(test_y)

    # load the VGG16 network, ensuring the head FC layer sets are left off
    base_model = VGG16(weights="imagenet",
                       include_top=False,
                       input_tensor=Input(shape=(224, 224, 3)))

    # initialize the new head of the network, a set of FC layers followed by a softmax classifier
    head_model = FCHeadNet.build(base_model, len(class_names), 256)

    # place the head FC model on top of the base model -- this will
    # become the actual model we will train
    model = Model(inputs=base_model.input, outputs=head_model)

    # loop over all layers in the base model and freeze them so they
    # will *not* be updated during the training process
    for layer in base_model.layers:
        layer.trainable = False

    # compile our model (this needs to be done after our setting our layers to being non-trainable
    print("[INFO] compiling model...")
    opt = RMSprop(lr=0.001)
    model.compile(loss="categorical_crossentropy",
                  optimizer=opt,
                  metrics=["accuracy"])

    # train the head of the network for a few epochs (all other  layers are frozen) -- this will
    # allow the new FC layers to start to become initialized with actual "learned" values
    # versus pure random
    print("[INFO] training head...")
    model.fit_generator(
        augmentation.flow(train_x, train_y, batch_size=32),
        validation_data=(test_x, test_y),
        epochs=25,
        steps_per_epoch=len(train_x) // 32,
        verbose=1,
    )

    # evaluate the network after initialization
    print("[INFO] evaluating after initialization...")
    predictions = model.predict(test_x, batch_size=32)
    print(
        classification_report(test_y.argmax(axis=1),
                              predictions.argmax(axis=1),
                              target_names=class_names))

    # now that the head FC layers have been trained/initialized, lets
    # unfreeze the final set of CONV layers and make them trainable
    for layer in base_model.layers[15:]:
        layer.trainable = True

    # for the changes to the model to take affect we need to recompile
    # the model, this time using SGD with a *very* small learning rate
    print("[INFO] re-compiling model...")
    opt = SGD(lr=0.001)
    model.compile(loss="categorical_crossentropy",
                  optimizer=opt,
                  metrics=["accuracy"])

    # train the model again, this time fine-tuning *both* the final set
    # of CONV layers along with our set of FC layers
    print("[INFO] fine-tuning model...")
    model.fit_generator(
        augmentation.flow(train_x, train_y, batch_size=32),
        validation_data=(test_x, test_y),
        epochs=100,
        steps_per_epoch=len(train_x) // 32,
        verbose=1,
    )
    # evaluate the network on the fine-tuned model
    print("[INFO] evaluating after fine-tuning...")
    predictions = model.predict(test_x, batch_size=32)
    print(
        classification_report(test_y.argmax(axis=1),
                              predictions.argmax(axis=1),
                              target_names=class_names))

    # save the model to disk
    print("[INFO] serializing model...")
    model.save(args["model"])
import numpy as np
import argparse
import random
import os
import cv2

ap = argparse.ArgumentParser()
ap.add_argument('-m', '--model', required=True, help='path to input model')
ap.add_argument('-d', '--dataset', required=True, help='path to dataset')
args = vars(ap.parse_args())

model = load_model(args['model'])
imagepaths = list(paths.list_images(args['dataset']))
classNames = sorted(os.listdir(args['dataset']))
random.shuffle(imagepaths)
aap = AspectAwarePreprocessor(224, 224)
iap = ImageToArrayPreprocessor()
imagePath = random.choice(imagepaths)
while True:

    image = cv2.imread(imagePath)
    original = image.copy()
    image = aap.preprocess(img_to_array(image))
    image = iap.preprocess(image)
    image = np.expand_dims(image, axis=0)

    label = imagePath.split(os.path.sep)[-2]
    pred = model.predict(image, batch_size=1)
    pred = classNames[pred.argmax(axis=1)[0]]
    cv2.putText(original, "label: {}".format(label, pred), (10, 20),
                cv2.FONT_HERSHEY_COMPLEX, 0.7, (250, 250, 250), 2)
Beispiel #12
0
# coding: utf-8

from pyimagesearch.datasets import SimpleDatasetLoader
from pyimagesearch.preprocessing import ImageToArrayPreprocessor
from pyimagesearch.preprocessing import AspectAwarePreprocessor
import numpy as np
from imutils import paths
from matplotlib import pyplot as plt
from keras import layers
import keras
import keras.backend as K
import cv2
from keras.models import Model, load_model

imagePaths = list(paths.list_images("train"))
aap = AspectAwarePreprocessor(256, 256)
#iap = ImageToArrayPreprocessor()
sdl = SimpleDatasetLoader(preprocessors=[aap])
(data, labels) = sdl.load(imagePaths, verbose=500)
print("[INFO] training data loaded")
# print(data.shape)
# plt.imshow(data[0])
# plt.show()

width = 256
height = 256
channels = 3

input_layer = layers.Input(name='input', shape=(height, width, channels))

# Encoder
from pyimagesearch.preprocessing import ImageToArrayPreprocessor
from pyimagesearch.preprocessing import AspectAwarePreprocessor

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

WIDTH = 224
HEIGHT = 224
CHANNEL = 3

iap = ImageToArrayPreprocessor()
aap = AspectAwarePreprocessor(WIDTH, HEIGHT)


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--images", help="path to input images")
    ap.add_argument("-a", "--audios", help="path to input audios")
    ap.add_argument("-m",
                    "--model",
                    default="model.model",
                    help="path to input model")
    ap.add_argument("-c", "--camera", type=int, default=0)
    ap.add_argument("-r", "--record", help="record file while testing")
    # ap.add_argument("-o", "--output",  help="path to output images")
    args = vars(ap.parse_args())
Beispiel #14
0
print("[INFO] loading images...")
imagePaths = list(paths.list_images(config.IMAGES_PATH))
data = pd.read_csv('labels.csv')
# ids = data['id']
classNames = data['breed']
classNames = [str(x) for x in np.unique(classNames)]
data_dict = data.set_index('id')['breed'].to_dict()

# le = LabelEncoder()
# labels = le.fit_transform(labels)
# labels_inv = le.inverse_transform(labels)
# classNames = [pt.split(os.path.sep)[-2] for pt in imagePaths]
# classNames = [str(x) for x in np.unique(classNames)]

# initialize the image preprocessors
aap = AspectAwarePreprocessor(config.INPUT_SIZE, config.INPUT_SIZE)
iap = ImageToArrayPreprocessor()

# load the dataset from disk then scale the raw pixel intensities to
# the range [0, 1]
sdl = SimpleDatasetLoader(preprocessors=[aap, iap])
(data, ids) = sdl.load(imagePaths, verbose=500)
labels = [data_dict[i] for i in ids]
data = data.astype("float") / 255.0

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
	test_size=0.25, stratify=labels)

# convert the labels to vectors
Beispiel #15
0
# Construct image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30,
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         shear_range=0.2,
                         zoom_range=0.2,
                         horizontal_flip=True)

# Grab the list of images and extract class labels for evaluating
print('[INFO] loading images...')
image_paths = list(paths.list_images(args['dataset']))
class_names = [image_path.split(os.path.sep)[-2] for image_path in image_paths]
class_names = [str(x) for x in np.unique(class_names)]

# initialize image preprocessors
aap, iap = AspectAwarePreprocessor(224, 224), ImageToArrayPreprocessor()

# load dataset and scale to range[0, 1]
sdl = SimpleDatasetLoader(preprocessors=[aap, iap])
data, labels = sdl.load(image_paths, verbose=500)
data = data.astype('float') / 255

# split data to train and test set
trainX, testX, trainY, testY = train_test_split(data,
                                                labels,
                                                test_size=0.25,
                                                random_state=42)

# convert labels to vector
le = LabelBinarizer()
trainY = le.fit_transform(trainY)
(trainPaths, testPaths, trainLabels, testLabels) = split
# 将新的train分割成train和val两份
split = train_test_split(trainPaths,
                         trainLabels,
                         test_size=config.NUM_VAL_IMAGES,
                         stratify=trainLabels,
                         random_state=42)
(trainPaths, valPaths, trainLabels, valLabels) = split

# 将数据构建一个list,方便写入HDF5文件中
datasets = [("train", trainPaths, trainLabels, config.TRAIN_HDF5),
            ("val", valPaths, valLabels, config.VAL_HDF5),
            ("test", testPaths, testLabels, config.TEST_HDF5)]

# 数据预处理
aap = AAP.AspectAwarePreprocessor(256, 256)
(R, G, B) = ([], [], [])

# 遍历数据集
for (dType, paths, labels, outputPath) in datasets:
    # HDF5 writer
    print("[INFO] building {}...".format(outputPath))
    writer = HDF.HDF5DatasetWriter((len(paths), 256, 256, 3), outputPath)

    # 初始化进度条
    widgets = [
        "Building Dataset: ",
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
def main():
    """Run image classification
    """
    # construct the argument parse and parse the arguments
    args = argparse.ArgumentParser()
    args.add_argument("-d",
                      "--dataset",
                      required=True,
                      help="path to input dataset")
    args = vars(args.parse_args())

    # grab the list of images that we'll be describing, then extract
    # the class label names from the image paths
    print("[INFO] loading images...")
    image_paths = list(paths.list_images(args["dataset"]))
    class_names = [pt.split(os.path.sep)[-2] for pt in image_paths]
    class_names = [str(x) for x in np.unique(class_names)]

    # initialize the image preprocessors
    aspect_aware_preprocessor = AspectAwarePreprocessor(64, 64)
    image_to_array_preprocessor = ImageToArrayPreprocessor()
    # load the dataset from disk then scale the raw pixel intensities to the range [0, 1]
    simple_dataset_loader = SimpleDatasetLoader(
        preprocessors=[aspect_aware_preprocessor, image_to_array_preprocessor])

    (data, labels) = simple_dataset_loader.load(image_paths, verbose=500)
    data = data.astype("float") / 255.0

    # partition the data into training and testing splits using 75% of
    # the data for training and the remaining 25% for testing
    (train_x, test_x, train_y, test_y) = train_test_split(data,
                                                          labels,
                                                          test_size=0.25,
                                                          random_state=42)
    # convert the labels from integers to vectors
    train_y = LabelBinarizer().fit_transform(train_y)
    test_y = LabelBinarizer().fit_transform(test_y)

    # initialize the optimizer and model
    print("[INFO] compiling model...")
    opt = SGD(lr=0.05)
    model = MiniVGGNet.build(width=64,
                             height=64,
                             depth=3,
                             classes=len(class_names))
    model.compile(loss="categorical_crossentropy",
                  optimizer=opt,
                  metrics=["accuracy"])

    # train the network
    print("[INFO] training network...")
    model_fit = model.fit(train_x,
                          train_y,
                          validation_data=(test_x, test_y),
                          batch_size=32,
                          epochs=100,
                          verbose=1)

    # evaluate the network
    print("[INFO] evaluating network...")
    predictions = model.predict(test_x, batch_size=32)
    print(
        classification_report(test_y.argmax(axis=1),
                              predictions.argmax(axis=1),
                              target_names=class_names))
    # plot the training loss and accuracy
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(np.arange(0, 100), model_fit.history["loss"], label="train_loss")
    plt.plot(np.arange(0, 100),
             model_fit.history["val_loss"],
             label="val_loss")
    plt.plot(np.arange(0, 100), model_fit.history["acc"], label="train_acc")
    plt.plot(np.arange(0, 100), model_fit.history["val_acc"], label="val_acc")
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    plt.show()
Beispiel #18
0
rows = open(config.TEST_MX_LIST).read().strip().split("\n")
rows = np.random.choice(rows, size=args["sample_size"])

# load our pre-trained model
print("[INFO] loading pre-trained model...")
checkpointsPath = os.path.sep.join([args["checkpoints"], args["prefix"]])
model = mx.model.FeedForward.load(checkpointsPath, args["epoch"])

# compile the model
model = mx.model.FeedForward(ctx=[mx.gpu(0)],
                             symbol=model.symbol,
                             arg_params=model.arg_params,
                             aux_params=model.aux_params)

# initialize the image pre-processors
sp = AspectAwarePreprocessor(width=224, height=224)
mp = MeanPreprocessor(config.R_MEAN, config.G_MEAN, config.B_MEAN)
iap = ImageToArrayPreprocessor(dataFormat="channels_first")

# loop over the testing images
for row in rows:
    # grab the target class label and the image path from the row
    (target, imagePath) = row.split("\t")[1:]
    target = int(target)

    # load the image from disk and pre-process it by resizing the
    # image and applying the pre-processors
    image = cv2.imread(imagePath)
    orig = image.copy()
    orig = imutils.resize(orig, width=min(500, orig.shape[1]))
    image = iap.preprocess(mp.preprocess(sp.preprocess(image)))
# construct argument parser and parse the argument
ap = argparse.ArgumentParser()
ap.add_argument('-d', '--dataset', required=True, help='path to input dataset')
ap.add_argument('-o',
                '--output',
                required=True,
                help='path to output for training monitor')
args = vars(ap.parse_args())

# grab the list of image paths
print('[INFO] loading images...')
image_paths = list(paths.list_images(args['dataset']))

# initialize the preprocessors
aap, iap = AspectAwarePreprocessor(64, 64), ImageToArrayPreprocessor()

# load images and scale it to range [0, 1]
sdl = SimpleDatasetLoader(preprocessors=[aap, iap])
data, labels = sdl.load(image_paths, verbose=500)
data = data.astype('float') / 255.

# partition data and label
trainX, testX, trainY, testY = train_test_split(data,
                                                labels,
                                                test_size=0.25,
                                                random_state=42)

# convert label to vector
le = LabelBinarizer()
trainY = le.fit_transform(trainY)
Beispiel #20
0
def main():
    """Serialize the dataset
    """
    # grab the paths to the images
    train_paths = list(paths.list_images(config.IMAGES_PATH))
    train_labels = [
        p.split(os.path.sep)[-1].split(".")[0] for p in train_paths
    ]
    label_encoder = LabelEncoder()
    train_labels = label_encoder.fit_transform(train_labels)

    # perform stratified sampling from the training set to build the
    # testing split from the training data
    split = train_test_split(train_paths,
                             train_labels,
                             test_size=config.NUM_TEST_IMAGES,
                             stratify=train_labels,
                             random_state=42)
    (train_paths, test_paths, train_labels, test_labels) = split

    # perform another stratified sampling, this time to build the validation data
    split = train_test_split(train_paths,
                             train_labels,
                             test_size=config.NUM_VAL_IMAGES,
                             stratify=train_labels,
                             random_state=42)
    (train_paths, val_paths, train_labels, val_labels) = split

    # construct a list pairing the training, validation, and testing image paths along
    # with their corresponding labels and output HDF5 files
    datasets = [
        ("train", train_paths, train_labels, config.TRAIN_HDF5),
        ("val", val_paths, val_labels, config.VAL_HDF5),
        ("test", test_paths, test_labels, config.TEST_HDF5),
    ]

    # initialize the image preprocessor and the lists of RGB channel averages
    aap = AspectAwarePreprocessor(256, 256)
    (R, G, B) = ([], [], [])
    # loop over the dataset tuples
    for (dataset_type, path_list, labels, output_path) in datasets:
        # create HDF5 writer
        print("[INFO] building {}...".format(output_path))
        writer = HDF5DatasetWriter((len(path_list), 256, 256, 3), output_path)
        # initialize the progress bar
        widgets = [
            "Building Dataset: ",
            progressbar.Percentage(), " ",
            progressbar.Bar(), " ",
            progressbar.ETA()
        ]
        pbar = progressbar.ProgressBar(maxval=len(path_list),
                                       widgets=widgets).start()
        # loop over the image paths
        for (i, (path, label)) in enumerate(zip(path_list, labels)):
            # load the image and process it
            image = cv2.imread(path)
            image = aap.preprocess(image)
            # if we are building the training dataset, then compute the mean of
            # each channel in the image, then update the respective lists
            if dataset_type == "train":
                (b, g, r) = cv2.mean(image)[:3]
                R.append(r)
                G.append(g)
                B.append(b)
            # add the image and label # to the HDF5 dataset
            writer.add([image], [label])
            pbar.update(i)
        # close the HDF5 writer
        pbar.finish()
        writer.close()
    # construct a dictionary of averages, then serialize the means to a JSON file
    print("[INFO] serializing means...")
    rgb_dict = {"R": np.mean(R), "G": np.mean(G), "B": np.mean(B)}
    f = open(config.DATASET_MEAN, "w")
    f.write(json.dumps(rgb_dict))
    f.close()
Beispiel #21
0
split = train_test_split(trainPaths,
                         trainLabels,
                         stratify=trainLabels,
                         test_size=config.NUM_VAL_IMAGES,
                         random_state=42)
(trainPaths, valPaths, trainLabels, valLabels) = split

# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output HDF5
# files
dataset = [("train", trainPaths, trainLabels, config.TRAIN_HDF5),
           ('test', testPaths, testLabels, config.TEST_HDF5),
           ("val", valPaths, valLabels, config.VAL_HDF5)]

aap = AspectAwarePreprocessor(256, 256)
(R, G, B) = ([], [], [])

for (dtype, paths, labels, outputPath) in dataset:
    print('[INFO] building {}...'.format(outputPath))
    writer = HDF5DatasetWriter((len(paths), 256, 256, 3),
                               outputPath=outputPath)
    widgets = [
        'Building Dataset: ',
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pgbar = progressbar.ProgressBar(max_value=len(paths),
                                    widgets=widgets).start()
Beispiel #22
0
import os

# construct argument parser and parse the argument
ap = argparse.ArgumentParser()
ap.add_argument('-d',
                '--dataset',
                required=True,
                help='path to the input dataset')
args = vars(ap.parse_args())

# get the list of image paths
print('[INFO] loading images...')
image_paths = list(paths.list_images(args['dataset']))

# initialize the preprocessors
aap, iap = AspectAwarePreprocessor(width=64,
                                   height=64), ImageToArrayPreprocessor()

# load images amd scale it to range [0, 1]
sdl = SimpleDatasetLoader(preprocessors=[aap, iap])
data, labels = sdl.load(image_paths, verbose=500)
data = data.astype('float') / 255.

# partition data
trainX, testX, trainY, testY = train_test_split(data,
                                                labels,
                                                test_size=0.25,
                                                random_state=42)

# convert it to vector
le = LabelBinarizer()
trainY = le.fit_transform(trainY)