def main(): for filename in os.listdir(input_dir): # Convert pdf to image files if filename.endswith('.pdf'): print(os.path.join(input_dir, filename)) fullName = os.path.join(input_dir, filename) pages = convert_from_path(fullName, 500) image_counter = 1 for page in pages: image_name = os.path.splitext(fullName)[0] + '_' + str(image_counter) + '.tiff' page.save(image_name, format='TIFF') image_counter += 1 for filename in os.listdir(input_dir): img_ext = ['.png', '.jpg', '.jpeg', '.tiff'] if filename.endswith(tuple(img_ext)): print(filename) fileAddress = os.path.join(input_dir, filename) img = process_image(fileAddress) # Recognize the text as string in image using pytesserct config = '' text = str(pytesseract.image_to_string(img, lang=langs, config=config)) # Remove empty lines of text - s.strip() removes lines with spaces text = os.linesep.join([s for s in text.splitlines() if s.strip()]) # Creating a text file to write the output outfile = os.path.join(output_dir, "out_" + os.path.splitext(filename)[0] + ".txt") with open(outfile, 'w') as text_file: print(text, file=text_file)
def get_CANVAS(): folder = 'canvas_images' X = [] Y = [] for number in range(0, 10): for filename in os.listdir(folder + '/' + str(number)): img = cv2.imread(os.path.join(folder, str(number), filename), cv2.IMREAD_GRAYSCALE) if img is not None: img = process_image(img) # TODO: is this reshape needed? X.append(img.reshape(image_size, image_size, 1)) Y.append(number) X = np.pad(X, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant') X, Y = unison_shuffled_copies(np.array(X), np.array(Y)) # split data in train, dev, test set train_size = int(X.shape[0] / 2) dev_size = int(X.shape[0] / 4) test_size = dev_size X_canvas_train = X[0:train_size, :] Y_canvas_train = Y[0:train_size] X_dev = X[train_size:(train_size + dev_size), :] Y_dev = Y[train_size:(train_size + dev_size)] X_test = X[train_size + dev_size:, :] Y_test = Y[train_size + dev_size:] return X_canvas_train, Y_canvas_train, X_dev, Y_dev, X_test, Y_test
def main(): image_path = "/home/ananthu/projects/Document_Extraction/data/pan_16.jpg" binarized_image = process_image(image_path) binarized_image.save("pan.png") x = pytesseract.image_to_string(binarized_image) id_read("pan.png") data = pan_fill(x)
def predict(image_path, model, device, topk=5): idx_map = {j: v for v, j in model.class_to_idx.items()} image = preprocess.process_image(Image.open(image_path)) image = torch.from_numpy(image).unsqueeze(0).to(device).float() model.eval() with torch.no_grad(): output = model.forward(image) ps = torch.exp(output) probs, classes = ps.topk(topk) probs = probs.cpu().numpy()[0].tolist() classes = classes.cpu().numpy()[0].tolist() classes = [idx_map[i] for i in classes] return probs, classes
def train_generator(batch_size): while True: x_train = [] y_train = [] for i in range(batch_size): rand_index = random.randrange(len(df_train)) df = df_train.iloc[[rand_index]] f = str(df['image_name'].item()) tags = df['tags'].item() img = cv2.imread('data/train-jpg/{}.jpg'.format(f)) img = process_image(img) targets = np.zeros(17) for t in tags.split(' '): targets[label_map[t]] = 1 x_train.append(img) y_train.append(targets) x_train = np.array(x_train, np.float16) / 255. y_train = np.array(y_train, np.uint8) yield x_train, y_train
def authenticate(): global images global same batch = [process_image(x, same).flatten() for x in images] batch = np.squeeze(np.array([batch])) print(batch.shape) print(labels.shape) batch = batch / 255 saver.restore( sess, tf.train.latest_checkpoint( "/Users/kevin/Desktop/slohacks2019/models/")) pr = y_conv.eval(feed_dict={x: batch, y_: labels[0], keep_prob: 1.0}) print(pr, max(pr)) # thing = json.loads(json.dumps({'same':max(pr)})) return "hii"
def recognize(): request_data = request.get_json() imgbase64 = request_data['data'] encoded_data = imgbase64.split(',')[1] filename = 'canvas_image.png' imgdata = base64.b64decode(encoded_data) with open(filename, 'wb') as f: f.write(imgdata) img = cv2.imread('canvas_image.png', cv2.IMREAD_GRAYSCALE) img = process_image(img) img = img.reshape(1, 28, 28, 1) / 255. img = np.pad(img, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant') pred = sess.run(Y_hat, feed_dict={X: img}) pred_softmax = pred / pred.sum(axis=1, keepdims=True) pred = np.argmax(pred_softmax, axis=1) return jsonify({'number': int(pred[0])}), 200
def generatePickle(): data = [] categories = [ 'binata', 'buhay', 'dalaga', 'eksamen', 'ewan', 'gunita', 'halaman', 'hapon', 'isip', 'kailangan', 'karaniwan', 'kislap', 'larawan', 'mabuti', 'noon', 'opo', 'papaano', 'patuloy', 'roon', 'subalit', 'talaga', 'ugali', 'wasto' ] dir = 'C:\\Users\\Scadoodie\\Desktop\\Braille2C_Datasets' print('Generating pickle model...') for category in categories: path = os.path.join(dir, category) print("Current directory being pre-processed: ", path) label = categories.index(category) for img in os.listdir(path): imgpath = os.path.join(path, img) orig_img = cv.imread(imgpath, 1) #Load image in Color try: image = process_image(orig_img) data.append([image, label]) except Exception as e: pass if (len(data) <= 0): print("Data is empty") else: #Data length should contain 1180 images print("Success! braille-model.pickle generated.") print("Data Length: ", len(data)) pick_in = open('braille-model.pickle', 'wb') pickle.dump(data, pick_in) pick_in.close()
def build_sequence(self, frames): """Given a set of frames (filenames), build our sequence.""" return [process_image(x, self.image_shape) for x in frames]
train(x_train=x_train, y_train=y_train, x_val=x_val, y_val=y_val) test(x_test=x_test, y_test=y_test) model = load_model('gesture-model.h5') predicted_text = "" cap = cv2.VideoCapture(0) left_margin = 100 while True: ret, frame = cap.read() frame = cv2.flip(frame, 1) cv2.rectangle(frame, (350, 150), (600, 400), (0, 255, 0)) hand_frame = frame[150:400, 350:600] processed = process_image(hand_frame) processed = np.reshape(processed, (1, processed.shape[0], processed.shape[1], 1)) text_area = np.zeros((480, 480, 3), dtype=np.uint8) predicted = model.predict(processed) predicted_label = get_label(np.argmax(predicted)) predicted_text = predicted_label cv2.putText(text_area, predicted_text, (4, 100), cv2.FONT_HERSHEY_COMPLEX, 2, (255, 255, 255)) frame = np.hstack((frame, text_area))
from preprocess import process_image from generateModel import generate, getPickle, updateModel from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, r2_score, recall_score, precision_score, f1_score categories = [ 'binata', 'buhay', 'dalaga', 'eksamen', 'ewan', 'gunita', 'halaman', 'hapon', 'isip', 'kailangan', 'karaniwan', 'kislap', 'larawan', 'mabuti', 'noon', 'opo', 'papaano', 'patuloy', 'roon', 'subalit', 'talaga', 'ugali', 'wasto' ] test_image_path = "..\\test-images\\test1.png" image = cv.imread(test_image_path, 1) image = process_image(image) image = np.expand_dims(image, 0) if os.path.isfile('braille-model.pickle'): print('Model found...') pickle = getPickle() model, xtrain, xtest, ytrain, ytest = updateModel(pickle) print('Predicting xtest...') prediction = model.predict(image) accuracy = model.score(xtest, ytest) print('Prediction Integer is :', prediction[0]) print('Prediction is :', categories[prediction[0]]) print(