Example #1
0
def build_hdf5(dataset, dataset_mean_path, label_encoder_path):
    # list of R, G, B means
    R, G, B = [], [], []

    # initialize image preprocessor
    aap = AspectAwarePreprocessor(256, 256)

    # loop over DATASETS
    for d_type, paths, labels, output_path in dataset:
        # construct HDF% dataset writer
        writer = HDF5DatasetWriter((len(labels), 256, 256, 3), output_path)
        # construct progress bar
        widgets = [
            f'Building {d_type}: ',
            progressbar.Percentage(), ' ',
            progressbar.Bar(), ' ',
            progressbar.ETA()
        ]
        pbar = progressbar.ProgressBar(maxval=len(labels),
                                       widgets=widgets).start()

        for i, (path, label) in enumerate(zip(paths, labels)):
            image = cv2.imread(path)

            image = aap.preprocess(image)

            if d_type == 'train':
                b, g, r = cv2.mean(image)[:3]
                R.append(r)
                G.append(g)
                B.append(b)

            writer.add([image], [label])
            pbar.update(i)

        writer.close()
        pbar.finish()

    if not os.path.exists(config.OUTPUT_BASE):
        os.makedirs(config.OUTPUT_BASE)

    # serialize means of R, G, B
    print('[INFO] serialzing means...')
    D = {'R': np.mean(R), 'G': np.mean(G), 'B': np.mean(B)}
    f = open(dataset_mean_path, 'w')
    f.write(json.dumps(D))
    f.close()

    # serialize label encoder
    print('[INFO] serializing label encoder...')
    f = open(label_encoder_path, 'wb')
    f.write(pickle.dumps(le))
    f.close()
Example #2
0
for (dtype, paths, labels, outputPath) in dataset:
    print('[INFO] building {}...'.format(outputPath))
    writer = HDF5DatasetWriter((len(paths), 256, 256, 3),
                               outputPath=outputPath)
    widgets = [
        'Building Dataset: ',
        progressbar.Percentage(), " ",
        progressbar.Bar(), " ",
        progressbar.ETA()
    ]
    pgbar = progressbar.ProgressBar(max_value=len(paths),
                                    widgets=widgets).start()

    for (i, (path, label)) in enumerate(zip(paths, labels)):
        image = cv2.imread(path)
        image = aap.preprocess(image)

        if dtype == 'train':
            (b, g, r) = cv2.mean(image)[:3]
            R.append(r)
            B.append(b)
            G.append(g)
        writer.add([image], [label])
        pgbar.update(i)
    pgbar.finish()
    writer.close()

print('[INFO] serializing mean...')
D = {'R': np.mean(R), 'G': np.mean(G), 'B': np.mean(B)}
f = open(config.DATASET_MEAN, 'w')
f.write(json.dumps(D))
Example #3
0
def main():
    """Serialize the dataset
    """
    # grab the paths to the images
    train_paths = list(paths.list_images(config.IMAGES_PATH))
    train_labels = [
        p.split(os.path.sep)[-1].split(".")[0] for p in train_paths
    ]
    label_encoder = LabelEncoder()
    train_labels = label_encoder.fit_transform(train_labels)

    # perform stratified sampling from the training set to build the
    # testing split from the training data
    split = train_test_split(train_paths,
                             train_labels,
                             test_size=config.NUM_TEST_IMAGES,
                             stratify=train_labels,
                             random_state=42)
    (train_paths, test_paths, train_labels, test_labels) = split

    # perform another stratified sampling, this time to build the validation data
    split = train_test_split(train_paths,
                             train_labels,
                             test_size=config.NUM_VAL_IMAGES,
                             stratify=train_labels,
                             random_state=42)
    (train_paths, val_paths, train_labels, val_labels) = split

    # construct a list pairing the training, validation, and testing image paths along
    # with their corresponding labels and output HDF5 files
    datasets = [
        ("train", train_paths, train_labels, config.TRAIN_HDF5),
        ("val", val_paths, val_labels, config.VAL_HDF5),
        ("test", test_paths, test_labels, config.TEST_HDF5),
    ]

    # initialize the image preprocessor and the lists of RGB channel averages
    aap = AspectAwarePreprocessor(256, 256)
    (R, G, B) = ([], [], [])
    # loop over the dataset tuples
    for (dataset_type, path_list, labels, output_path) in datasets:
        # create HDF5 writer
        print("[INFO] building {}...".format(output_path))
        writer = HDF5DatasetWriter((len(path_list), 256, 256, 3), output_path)
        # initialize the progress bar
        widgets = [
            "Building Dataset: ",
            progressbar.Percentage(), " ",
            progressbar.Bar(), " ",
            progressbar.ETA()
        ]
        pbar = progressbar.ProgressBar(maxval=len(path_list),
                                       widgets=widgets).start()
        # loop over the image paths
        for (i, (path, label)) in enumerate(zip(path_list, labels)):
            # load the image and process it
            image = cv2.imread(path)
            image = aap.preprocess(image)
            # if we are building the training dataset, then compute the mean of
            # each channel in the image, then update the respective lists
            if dataset_type == "train":
                (b, g, r) = cv2.mean(image)[:3]
                R.append(r)
                G.append(g)
                B.append(b)
            # add the image and label # to the HDF5 dataset
            writer.add([image], [label])
            pbar.update(i)
        # close the HDF5 writer
        pbar.finish()
        writer.close()
    # construct a dictionary of averages, then serialize the means to a JSON file
    print("[INFO] serializing means...")
    rgb_dict = {"R": np.mean(R), "G": np.mean(G), "B": np.mean(B)}
    f = open(config.DATASET_MEAN, "w")
    f.write(json.dumps(rgb_dict))
    f.close()
Example #4
0
sp = AspectAwarePreprocessor(width=224, height=224)
mp = MeanPreprocessor(config.R_MEAN, config.G_MEAN, config.B_MEAN)
iap = ImageToArrayPreprocessor(dataFormat="channels_first")

# loop over the testing images
for row in rows:
    # grab the target class label and the image path from the row
    (target, imagePath) = row.split("\t")[1:]
    target = int(target)

    # load the image from disk and pre-process it by resizing the
    # image and applying the pre-processors
    image = cv2.imread(imagePath)
    orig = image.copy()
    orig = imutils.resize(orig, width=min(500, orig.shape[1]))
    image = iap.preprocess(mp.preprocess(sp.preprocess(image)))
    image = np.expand_dims(image, axis=0)

    # classify the image and grab the indexes of the top-5 predictions
    preds = model.predict(image)[0]
    idxs = np.argsort(preds)[::-1][:5]

    # show the true class label
    print("[INFO] actual={}".format(le.inverse_transform(target)))

    # format and display the top predicted class label
    label = le.inverse_transform(idxs[0])
    label = label.replace(":", " ")
    label = "{}: {:.2f}%".format(label, preds[idxs[0]] * 100)
    cv2.putText(orig, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                (0, 255, 0), 2)
ap.add_argument('-m', '--model', required=True, help='path to input model')
ap.add_argument('-d', '--dataset', required=True, help='path to dataset')
args = vars(ap.parse_args())

model = load_model(args['model'])
imagepaths = list(paths.list_images(args['dataset']))
classNames = sorted(os.listdir(args['dataset']))
random.shuffle(imagepaths)
aap = AspectAwarePreprocessor(224, 224)
iap = ImageToArrayPreprocessor()
imagePath = random.choice(imagepaths)
while True:

    image = cv2.imread(imagePath)
    original = image.copy()
    image = aap.preprocess(img_to_array(image))
    image = iap.preprocess(image)
    image = np.expand_dims(image, axis=0)

    label = imagePath.split(os.path.sep)[-2]
    pred = model.predict(image, batch_size=1)
    pred = classNames[pred.argmax(axis=1)[0]]
    cv2.putText(original, "label: {}".format(label, pred), (10, 20),
                cv2.FONT_HERSHEY_COMPLEX, 0.7, (250, 250, 250), 2)
    cv2.imshow('Flower 17', original)
    key = cv2.waitKey(0)
    if key == ord('q'):
        break
    elif key == ord('n'):
        cv2.putText(original, "predicted: {}".format(pred), (10, 50),
                    cv2.FONT_HERSHEY_COMPLEX, 0.7, (255, 255, 255), 2)