def recognize_whole(model, lb, charList): """recognize from the charList""" predictions = '' for char in charList: image = cv.cvtColor(char, cv.COLOR_BGR2GRAY) image = captchahelper.preprocess(image, config.INPUT_SIZE, config.INPUT_SIZE) image = img_to_array(image) data = np.expand_dims(image, axis=0) / 255.0 pred = model.predict(data) pred = lb.inverse_transform(pred)[0] predictions += pred return predictions
def read_data_labels(path): data, labels = [], [] for imagePath in paths.list_images(path): image = cv.imread(imagePath, cv.IMREAD_GRAYSCALE) image = captchahelper.preprocess(image, config.INPUT_SIZE, config.INPUT_SIZE) # return a 3D Numpy array image = img_to_array(image) data.append(image) label = imagePath.split(os.path.sep)[-2] labels.append(label) return data, labels
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.copyMakeBorder(gray, 20, 20, 20, 20, cv2.BORDER_REPLICATE) thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] cnts, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:4] cnts = contours.sort_contours(cnts)[0] output = cv2.merge([gray] * 3) predictions = [] for c in cnts: (x, y, w, h) = cv2.boundingRect(c) roi = gray[y - 5:y + h + 5, x - 5:x + w + 5] roi = preprocess(roi, 28, 28) roi = np.expand_dims(img_to_array(roi), axis=0) / 255.0 pred = model.predict(roi).argmax(axis=1)[0] + 1 predictions.append(str(pred)) cv2.rectangle(output, (x - 2, y - 2), (x + w + 4, y + h + 4), (0, 255, 0), 1) cv2.putText(output, str(pred), (x - 5, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2) print("[INFO] captcha: {}".format("".join(predictions))) cv2.imshow("Output", output) cv2.waitKey()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset") ap.add_argument("-m", "--model", required=True, help="path to output model") args = vars(ap.parse_args()) # initialize the data and labels data = [] labels = [] # loop over the input images for imagePath in paths.list_images(args["dataset"]): # load the image, pre-process it, and store it in the data list image = cv2.imread(imagePath) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image = preprocess(image, 28, 28) image = img_to_array(image) data.append(image) # extract the class label from the image path and update the # labels list label = imagePath.split(os.path.sep)[-2] labels.append(label) # scale the raw pixel intensities to the range [0, 1] data = np.array(data, dtype="float") / 255.0 labels = np.array(labels) # partition the data into training and testing splits using 75% of # the data for training and the remaining 25% for testing (trainX, testX, trainY, testY) = train_test_split(data,
ap = argparse.ArgumentParser() ap.add_argument('-d', '--dataset', required=True, help='path to dataset directory') ap.add_argument('-m', '--model', required=True, help='path to output model') args = vars(ap.parse_args()) data = [] labels = [] for imagePath in list(paths.list_images(args['dataset'])): image = cv2.imread(imagePath) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image = preprocess(image, width=28, height=28) image = img_to_array(image) data.append(image) label = imagePath.split(os.path.sep)[-2] labels.append(label) data = np.array(data, dtype='float') / 255.0 labels = np.array(labels) (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42) lb = LabelBinarizer() trainY = lb.fit_transform(trainY)
def main(): """Evaluate model performance """ # construct the argument parse and parse the arguments args = argparse.ArgumentParser() args.add_argument("-i", "--input", required=True, help="path to input directory of images") args.add_argument("-m", "--model", required=True, help="path to input model") args = vars(args.parse_args()) # load the pre-trained network print("[INFO] loading pre-trained network...") model = load_model(args["model"]) # randomy sample a few of the input images image_paths = list(paths.list_images(args["input"])) image_paths = np.random.choice(image_paths, size=(10, ), replace=False) # loop over the image paths for image_path in image_paths: # load the image and convert it to grayscale, then pad the image to ensure digits # caught only the border of the image are retained image = cv2.imread(image_path) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.copyMakeBorder(gray, 20, 20, 20, 20, cv2.BORDER_REPLICATE) # threshold the image to reveal the digits thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] # find contours in the image, keeping only the four largest ones, # then sort them from left-to-right cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = imutils.grab_contours(cnts) cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:4] cnts = contours.sort_contours(cnts)[0] # initialize the output image as a "grayscale" image with 3 # channels along with the output predictions output = cv2.merge([gray] * 3) predictions = [] # loop over the contours for contour in cnts: # compute the bounding box for the contour then extract the digit (x, y, w, h) = cv2.boundingRect(contour) roi = gray[y - 5:y + h + 5, x - 5:x + w + 5] # pre-process the ROI and classify it then classify it roi = preprocess(roi, 28, 28) roi = np.expand_dims(img_to_array(roi), axis=0) / 255.0 pred = model.predict(roi).argmax(axis=1)[0] + 1 predictions.append(str(pred)) # draw the prediction on the output image cv2.rectangle(output, (x - 2, y - 2), (x + w + 4, y + h + 4), (0, 255, 0), 1) cv2.putText(output, str(pred), (x - 5, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2) # show the output image print("[INFO] captcha: {}".format("".join(predictions))) cv2.imshow("Output", output) cv2.waitKey()
def main(): """Train LeNet model on the image captcha dataset """ # construct the argument parse and parse the arguments args = argparse.ArgumentParser() args.add_argument("-d", "--dataset", required=True, help="path to input dataset") args.add_argument("-m", "--model", required=True, help="path to output model") args = vars(args.parse_args()) # initialize the data and labels data = [] labels = [] # loop over the input images for image_path in paths.list_images(args["dataset"]): # load the image, pre-process it, and store it in the data list image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image = preprocess(image, 28, 28) image = img_to_array(image) data.append(image) # extract the class label from the image path and update the labels list label = image_path.split(os.path.sep)[-2] labels.append(label) # scale the raw pixel intensities to the range [0, 1] data = np.array(data, dtype="float") / 255.0 labels = np.array(labels) # partition the data into training and testing splits using 75% of # the data for training and the remaining 25% for testing (train_x, test_x, train_y, test_y) = train_test_split(data, labels, test_size=0.25, random_state=42) # convert the labels from integers to vectors label_binarizer = LabelBinarizer().fit(train_y) train_y = label_binarizer.transform(train_y) test_y = label_binarizer.transform(test_y) # initialize the model print("[INFO] compiling model...") model = LeNet.build(width=28, height=28, depth=1, classes=9) opt = SGD(lr=0.01) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) # train the network print("[INFO] training network...") model_fit = model.fit(train_x, train_y, validation_data=(test_x, test_y), batch_size=32, epochs=15, verbose=1) # evaluate the network print("[INFO] evaluating network...") predictions = model.predict(test_x, batch_size=32) print( classification_report(test_y.argmax(axis=1), predictions.argmax(axis=1), target_names=label_binarizer.classes_)) # save the model to disk print("[INFO] serializing network...") model.save(args["model"]) # plot the training + testing loss and accuracy plt.style.use("ggplot") plt.figure() plt.plot(np.arange(0, 15), model_fit.history["loss"], label="train_loss") plt.plot(np.arange(0, 15), model_fit.history["val_loss"], label="val_loss") plt.plot(np.arange(0, 15), model_fit.history["acc"], label="acc") plt.plot(np.arange(0, 15), model_fit.history["val_acc"], label="val_acc") plt.title("Training Loss and Accuracy") plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend() plt.show()