예제 #1
0
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset")
ap.add_argument("-m", "--model", required=True, help="path to output model")
ap.add_argument("-o", "--output", required=True, help="path to the output image files")
args = vars(ap.parse_args())

classLabels = ["cat", "dog", "panda"]

print("[INFO] sampling images...")
imagePaths = np.array(list(paths.list_images(args["dataset"])))
idxs = np.random.randint(0, len(imagePaths), size=(10,))
imagePaths = imagePaths[idxs]

sp = simplepreprocessor.SimplePreprocessor(32, 32)
iap = imagetoarraypreprocessor.ImageToArrayPreprocessor()

sdl = simpledatasetloader.SimpleDatasetLoader(preprocessors=[sp, iap])
(data, labels) = sdl.load(imagePaths)
data = data.astype("float") / 255.0

print("[INFO] loading pre-trained network...")
model = load_model(args["model"])

print("[INFO] predicting...")
preds = model.predict(data, batch_size=32).argmax(axis=1)

for (i, imagePath) in enumerate(imagePaths):
	image = cv2.imread(imagePath)
	cv2.putText(image, "Label: {}".format(classLabels[preds[i]]), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                default=1,
                help="# of nearest neighbors for classification")
ap.add_argument(
    "-j",
    "--jobs",
    type=int,
    default=-1,
    help="# of jobs for k-NN distance (-1 uses all available cores)")
args = vars(ap.parse_args())  # eq. to ap.parse_args().__dict__
'''Step 1: getting the dataset'''
print("[INFO] loading images...")
dataPath = args["dataset"]
# get the full image paths, using a convinence function from imutils
paths = list(paths.list_images(dataPath))
# initialize the preprocessor, which for us, resizes img w/o aspect ratio consideration
preprocessor = simplepreprocessor.SimplePreprocessor(32, 32)  # 32x32 size
# get the data from the dataset, with preprocessor applied to it
sdl = simpledatasetloader.SimpleDatasetloader([preprocessor])
(data, labels) = sdl.load(paths, verbose=500)
# to apply the knn alg, we need to flatten images from 32x32x3 to 3072(32*32*3)
data = data.reshape(data.shape[0], 32 * 32 *
                    3)  # shape will change from (3000,32,32,3) to (3000, 3072)
# show some information on memory consumption of the images
print(f"[INFO] features matrix: {data.nbytes / (1024 * 1000.0)}MB")
'''Step 2: splitting dataset into train, test, and validation sets'''
# converting labels like cat, dog etc to numbers: 0,1,2 (encode the labels as integers)
le = LabelEncoder()
labels = le.fit_transform(labels)  # set(labels) = {0,1,2}

#splitting. Train-75%, test:25%. random_state sets the seed for random division
# trainX, testX: train and test data, trainY, testY: labels