def performOCR(self): """Send image to OCR and process the results""" self.OCRline = 0 busyDialog = BusyDialog(self) busyDialog.show() QApplication.processEvents() if self.file_list.currentItem().valid_market: self.current_result = OCR(self.color_image) """ try: self.current_result = OCR(self.color_image) except: QMessageBox.critical(self,"Error", "Error while performing OCR.\nPlease report the "+\ "problem to the developers through github, sourceforge or forum and provide the "+\ "screenshot which causes the problem.") return if self.current_result.station == None: QMessageBox.critical(self,"Error", "Screenshot not recognized.\n"+\ "Make sure you use a valid screenshot from the commodieties market. Should the "+\ "problem persist, please recalibrate the OCR areas with Settings->Calibrate.") return """ self.drawOCRPreview() self.markCurrentRectangle() self.drawStationName() self.skip_button.setEnabled(True) self.save_button.setEnabled(True) self.processOCRLine() else: self.nextFile()
def _test_ocr(self, train_file, test_file): # get data from images ground_truth = test_file.ground.classes test_file.remove_ground() # create OCR segmenter = ContourSegmenter(blur_y=5, blur_x=5) extractor = SimpleFeatureExtractor() classifier = KNNClassifier() ocr = OCR(segmenter, extractor, classifier) # train and test ocr.train(train_file) chars, classes, _ = ocr.ocr(test_file, show_steps=False) print chars print reconstruct_chars(ground_truth) self.assertEqual(chars, reconstruct_chars(ground_truth)) self.assertEqual(list(classes), list(ground_truth))
def ocr(language, input, output, system): settings = Settings() if isfile(input): sys.stdout.write("\r[= ]") sys.stdout.flush() item = CustomQListWidgetItem(split(input)[1], input, settings) color_img = item.loadColorImage() h, w, c = color_img.shape item.img_height = h item.ocr_areas = OCRAreasFinder(color_img) sys.stdout.write("\r[== ]") sys.stdout.flush() item.market_width = item.ocr_areas.market_width item.hud_color = item.ocr_areas.hud_color points = item.ocr_areas.market_table item.valid_market = item.ocr_areas.valid if item.market_width < 1065: print "Image too small! Minimum market width required: 1065px, given: " + str( item.market_width) + "px" return 1 result = OCR(None, color_img, item.ocr_areas, language, item) sys.stdout.write("\r") sys.stdout.flush() XMLOutput(language, input, output, item, result, system, w, h) return 0 else: print "Input file not found!" return 1
def main(argv): from itertools import count args = parse_args(argv) shell = ADBShell() model = get_model(shell) ident = get_ident(shell) assistant = AndroidAssistant(shell, ident, OCR(model)) if args.repeat: iterations = count(1) else: iterations = [''] for i, suffix in enumerate(iterations): if i >= 1: assistant.restart() if args.outdir: outdir = args.outdir + str(suffix) try: os.makedirs(outdir) except OSError: pass run_assistant(assistant.gen_board_disk(outdir, args.resume), assistant.make_move, args.from_start) else: run_assistant(assistant.gen_board_mem(), assistant.make_move, args.from_start)
def run_lpr(): logging.info(f'Debug mode {app.debug}') if request.method == 'POST': file = request.files['file'] img_bytes = file.read() file.close() if(img_bytes is not None): nparr = np.fromstring(img_bytes, np.uint8) inputImage = cv.imdecode(nparr, cv.IMREAD_COLOR) # TODO: state management: avoid loading net for every request yolo = Yolo(img_width=1056, img_height=576, debug=DEBUG, confidence_threshold=0.6, non_max_supress_theshold=0.4, classes_filename='../config/classes.names', model_architecture_filename="../config/yolov3_license_plates.cfg", model_weights_filename="../config/yolov3_license_plates_last.weights", output_directory='../debug/') roi_imgs = yolo.detect(inputImage) ocr = OCR(model_filename="../config/emnist_net_custom.pt", num_classes=36, use_cuda=False, debug=DEBUG) index = 0 for roi_img in roi_imgs: logging.info(f'\n\nProcessing ROI {index}') box = [yolo.bounding_boxes[index][0], yolo.bounding_boxes[index][1], yolo.bounding_boxes[index][2], yolo.bounding_boxes[index][3]] predict(yolo.img, roi_img, box, str(index), (0,255,0), ocr) index += 1 # API response: the highest confidence one logging.info(f'\n\n---Processing the Highest Confidence ROI---\n') bounding_box = None emnist_net_preds = None tesseract_preds = None if(yolo.highest_object_confidence > 0 and yolo.roi_img is not None): bounding_box = { 'x': yolo.box_x, 'y': yolo.box_y, 'w': yolo.box_w, 'h': yolo.box_h } _, emnist_net_preds, tesseract_preds = predict(yolo.img, yolo.roi_img, [yolo.box_x, yolo.box_y, yolo.box_w, yolo.box_h], "", (255,255,0), ocr) if(DEBUG): cv.imwrite("../debug/result.jpg", yolo.img.astype(np.uint8)) data = { 'bounding_box': bounding_box, 'confidence': yolo.highest_object_confidence, 'classId': str(yolo.classId_highest_object), 'emnist_net_preds': emnist_net_preds, 'tesseract_preds': tesseract_preds } response = jsonify(data) response.status_code = 200 return response
def run(hidNum,lr): conf = Config() conf.lr = lr conf.hidNum = hidNum conf.cell_type = cell_type if "LSTMDRBM" in conf.cell_type: conf.gate_use = gate # For LSTM-DRBM only if conf.cell_type=="BasicRNN": conf.activation = activation if "AE" in conf.cell_type: conf.f = f_act conf.g = g_act print("running ..." + conf.cell_type +"_"+ conf.obj_func + " "+ conf.opt + " hidNum=%d lr=%.5f"%(conf.hidNum,conf.lr)) result_dir = (EXP_DIR + "/" + conf.opt + "_" + conf.cell_type + "_" + conf.gate_use + "_"+ conf.activation + "_" + conf.f + "_" + conf.g + "_"+ conf.obj_func + "_h" + str(conf.hidNum) + "_b" + str(conf.batch_size) +"_"+str(lr)) if not os.path.isdir(result_dir): os.makedirs(result_dir) result_log = result_dir+'/result_log.csv' if os.path.isfile(result_log): print("exist file "+ result_log) return acc = [] for fold in range(FOLD_NUM): print("Fold %d, training ..."%(fold+1)) result_fold_log = result_dir+"/fold_"+str(fold+1)+"_log.csv" if os.path.isfile(result_fold_log): continue conf.ckp_file= result_dir + '/fold_'+str(fold+1)+'.ckpt' dataset = OCR(DAT_DIR,fold) model = LSTM_SCRBM(conf,dataset) vld_acc,vld_nllh,vld_f1,tst_acc,tst_nllh,tst_f1,_ = model.run() acc.append([vld_acc,tst_acc]) print("[Fold %d] : valid acc:%.5f test acc:%.5f" %(fold+1,vld_acc,tst_acc)) acc = np.mean(np.array(acc),axis=0) print("validation acc: %.5f test acc: %.5f" % (acc[0],acc[1])) #Save to CSV File print("Saving results ...") np.savetxt(result_log,acc,delimiter=',') # delete all checkpoints print("Clear checkpoint graph ...") os.remove(os.path.join(result_dir,"checkpoint")) ckpt_files = os.listdir(result_dir) for f in ckpt_files: if ".ckpt." in f: os.remove(os.path.join(result_dir,f))
def rungame(args): model = args.pop(0) d = args.pop(0) if args: startpoint = os.path.basename(args[0]) else: startpoint = None run_assistant(gen_board(OCR(model), d, startpoint), make_move)
def rec_captcha(self, captcha_items): ''' recognize captcha, either automatically or manually. ''' if captcha_items: image_path = "images/captcha.jpg" urlretrieve(captcha_items['src'], image_path) return OCR().process_image(image_path) else: print("🕷 No CAPTCHA required for this login page.")
def __init__(self, prefix, dryRun): h, w = 300, 300 img = np.zeros((h, w, 3), np.uint8) img[:,:] = (255, 255, 255) self.brush = Brush("Brush") self.sketcher = Sketcher("Dataset Creator", img, self.brush) self.__folders = OCR.generateFolderList(OCR.DIGITS | OCR.LETTERS | OCR.SYMBOLS) self.__prefix = prefix self.__lastFile = None self.__dryRun = dryRun
class CMND(object): def __init__(self): self.Detect_cmnd = CENTER_MODEL( weight_path="Detect_cmnd/weights/model_cmnd_best.pth") self.Detect_fields = Detector_fields() self.ocr = OCR() self.fields = ['id', 'name', 'date', 'ad1', 'ad2'] def predict(self, img): restext = {} t1 = time.time() font = cv2.FONT_HERSHEY_SIMPLEX img_aligned = self.Detect_cmnd.detect(img) print("centernet ", time.time() - t1) t1 = time.time() if (img_aligned is not None): img_aligned = cv2.resize(img_aligned, (800, 650)) res, resimg = self.Detect_fields.detect(img_aligned, 0.3) print("yolo ", time.time() - t1) t1 = time.time() for x in self.fields: restext[x] = self.ocr.predict(resimg[x], res[x], x) if (restext[x] is None): restext[x] = "" print("ocr ", time.time() - t1) # if(cl=="id"): # text=(self.id.recognize(im)) # # if(cl=="name"): # else: # text=self.str.recognize(im) # img_aligned=cv2.putText(img_aligned,text,(box[0],box[1]),font,1,(255,0,0),1,cv2.LINE_AA) #for box in res[x]: #print("a") #img_aligned=cv2.rectangle(img_aligned,(box[0],box[1]),(box[2],box[3]),(255,0,0),2,1) else: img_aligned = cv2.resize(img, (750, 600)) res, resimg = self.Detect_fields.detect(img_aligned, 0.3) for x in self.fields: restext[x] = self.ocr.predict(resimg[x], res[x], x) #for box in res[x]: #img_aligned=cv2.rectangle(img_aligned,(box[0],box[1]),(box[2],box[3]),(255,0,0),2,1) restext['name'] = restext['name'].upper() return img_aligned, restext
def post(self): from settings import upload_path from ocr import OCR now_time = time.strftime('%Y-%m-%dT%H-%M-%S', time.localtime(time.time())) dir_prefix = now_time try: base64ImgData = self.request.arguments['data'][0].decode("utf-8") # 去除base64图片前面的说明str base64ImgData = base64ImgData[base64ImgData.find(',') + 1:] imgData = base64.b64decode(base64ImgData) except: self.finish({ 'code': 0, 'message': "error" }) try: os.makedirs(os.path.join(upload_path, dir_prefix)) except: pass # save file filepath = os.path.join(upload_path, dir_prefix, "img.png" ) with open(filepath, 'wb') as up: up.write(imgData) # orc ocrinstance = OCR() res = ocrinstance.getResult(filepath) statusCode = res['code'] status = '成功' if (statusCode == 1) else '失败' text = res['text'] # save res respath = os.path.join(upload_path, dir_prefix, 'result.txt') with open(respath, 'w', encoding="utf-8") as info: info.write(status + '\n' + text) self.finish({ 'code': statusCode, 'message': text })
def main(): # All things global should be defined here global ocrEngine, lbpEngine, shapesEngine, models lbpEngine = LocalBinaryPatterns(8, 24, "uniform") shapesEngine = Shapes() ocrEngine = OCR() # Load the pre-trained classification models models = {} models["linear"] = joblib.load("ml/linear_svm.model") models["rbf"] = joblib.load("ml/rbf_kernel_svm.model") models["knn"] = joblib.load("ml/knn.model")
def classify(self, path): print("auto_classifying " + path) if path.endswith("jpg") or path.endswith("png") or path.endswith( "gif") or path.endswith("jpeg"): for t in self.search_history["txt_in_img"]: img2txt = OCR([path], lang="tur", search=t) img_class_img([path], templates=self.search_history["img_in_img"]) elif path.endswith("pdf"): for t in self.search_history["txt_in_img"]: img2txt = OCR([path], lang="tur", search=t, file_type="pdf") img_class_img([path], templates=self.search_history["img_in_img"], file_type="pdf") elif path.endswith("docx") or path.endswith("doc") or path.endswith( "odt") or path.endswith("pptx") or path.endswith( "ppt") or path.endswith("odp") or path.endswith( "xlsx") or path.endswith("xls") or path.endswith( "ods"): for t in self.search_history["txt_in_txt"]: pass
class Base: def setup_class(self): """ Подготовка тестового окружения. Запуск броузера если он не запущен. """ browser = Browser() browser.browser_run(pytest.url, timeout=pytest.start_timeout) time.sleep(pytest.start_timeout) self.ocr = OCR(pytest.path_to_res, search_accuracy=pytest.search_accuracy, debug=pytest.debug_mode) self.mouse = Mouse() self.keyboard = Keyboard() def teardown_class(self): """" Остановка тестового окружения. Закрытие браузера. """ self.keyboard.press_button('alt+f4') def find_image(self, image_path, areas, convert='1'): result = False for a in areas: result = self.ocr.find_image_on_screen(image_path, area=a, convert=convert) if result: return result if pytest.debug_mode: print("В блоке с координатами {}, {}." "Изображение не найдено, самый близкий фрагмент " "с различиями {} rms".format(a[0], a[1], result.rms)) if not result and pytest.repeat_search: result = self.ocr.find_image_on_screen(image_path, convert=convert) return result
def test_random_transcribed_image_post(): print("|| loading transcribed posts in r/transcribersofreddit") s2 = Subreddit.get('transcribersofreddit') s2.retrievePosts(sort='new', flair='Completed!') print('--> ' + str(s2)) print('\n|| retrieving random transcribed post') p2 = RedditPost.fromUrl(random.choice(s2.getPosts()).getCrosspostUrl()) p2.retrieve() print("--> Title : " + str(p2.getTitle())) print("--> Image URL : " + str(p2.getImageUrl())) print("--> Image Object : " + str(p2.getImage())) if p2.getImage(): print("\n +++ Image OCR Text +++ \n" + OCR.read(p2.getImage().convert('RGB')).strip()) print("\n\n\n +++ Image Human Trscb +++ \n" + str(p2.getImageHumanTranscription())) print("\n\n\n +++\n") return (s2, p2)
def grade(self, student_responses): for problem_number, segmented_digits in student_responses.items(): students_answer = "" confident = True for digit_image in segmented_digits: value, confidence = OCR(digit_image, self.model) students_answer += str(value) if confidence < CONFIDENCE_VALUE: confident = False correct_ans = self.answer_key[problem_number] if int(students_answer) == correct_ans: self.marks.append("correct") elif not confident: self.marks.append("unknown") else: self.marks.append("wrong")
def runOCR(self): self.add_files_button.setEnabled(False) self.remove_file_button.setEnabled(False) self.ocr_button.setEnabled(False) self.file_list.setEnabled(False) self.repaint() self.current_image = 0 self.results = [] self.images = [] self.prev = [] self.marketoffset = [] self.stationoffset = [] files = self.file_list.count() for i in xrange(files): self.file_list.setCurrentRow(i) item = self.file_list.currentItem() color_image = item.loadColorImage() preview_image = item.addTestImage(color_image) self.images.append(color_image) self.prev.append(preview_image) #cv2.imshow("x", color_image) #cv2.waitKey(0) # #images.append(preview_image) #self.setPreviewImage(preview_image) #return self.stationoffset.append(item.ocr_areas.station_name) self.marketoffset.append(item.ocr_areas.market_table) current_result = OCR(color_image, item.ocr_areas, self.settings["ocr_language"], item, levels=False, levenshtein=False) self.results.append(current_result) self.allBoxes() #print len(self.words) #print self.words[1] self.next_button.setEnabled(True) self.prev_button.setEnabled(False) self.showSet() self.notifier.setText("You did not check every word yet.")
def test_random_memes_post(): print("this test script is for testing reddit object functionalities and ocr") print("|| loading hot posts in r/memes") s1 = Subreddit.get('memes') s1.retrievePosts(sort='hot') print("--> " + str(s1)) print("\n|| retrieving random post") p1 = random.choice(s1.getPosts()) p1.retrieve() print("--> " + str(p1)) print("--> Title : " + str(p1.getTitle())) print("--> Image URL : " + str(p1.getImageUrl())) print("--> Image Object : " + str(p1.getImage())) if p1.getImage(): print("\n +++ Image OCR Text +++ \n" + OCR.read(p1.getImage().convert('RGB')).strip()) print("\n\n\n +++ Image Human Trscb +++ \n" + str(p1.getImageHumanTranscription())) print("\n\n\n +++\n") print("\n|| retrieving random comment") c1 = random.choice(p1.getComments()) print("--> " + str(c1)) print("--> Text : " + c1.getText()) return (s1, p1, c1)
def __init__(self, model, filenames, flags, pattern): self.ocr = OCR() self.ocr.loadModel(model, OCR.MODEL_ANN, flags) if filenames: counter = {} for filename in filenames: ch = self.ocr.charFromFile(filename) if pattern: label = self.__exec(pattern, filename) if label in counter: predicted, total = counter[label] counter[label] = (predicted + int(label == ch), total + 1) else: counter[label] = (int(label == ch), 1) print "%s in %s" % (ch, filename) if pattern: self.__analyze(counter) else: h, w = 200, 200 img = np.zeros((h, w, 3), np.uint8) img[:,:] = (255, 255, 255) self.brush = Brush("Brush") self.sketcher = Sketcher('Test OCR', img, self.brush)
parser.add_argument('--file', help='file to classify', type=str, nargs="*", default=[]) parser.add_argument('--tesseract', help='use tesseract for OCR', action='store_true') parser.add_argument('--tesslangpath', help='file path for tesseract', type=str) parser.add_argument('--terse', help='skip user prompts', action="store_true") args = parser.parse_args() verbose = args.verbose terse = args.terse force_train = args.retrain use_tesseract = args.tesseract tesslangpath = args.tesslangpath segmenter = MinContourSegmenter(blur_y=5, blur_x=5, min_width=5, block_size=17, c=6, max_ratio=4.0) extractor = SimpleFeatureExtractor(feature_size=10, stretch=False) classifier = KNNClassifier(k=3 ) ocr = OCR(segmenter, extractor, classifier) for file_to_train in args.trainfile: training_image = ImageFile(file_to_train) if not training_image.isGrounded() or force_train: #trainingsegmenter = ContourSegmenter(blur_y=1, blur_x=1, min_width=3, min_height=15, max_height=50, min_area=30, block_size=23, c=3) # tweaked for black font trainingsegmenter = ContourSegmenter(blur_y=1, blur_x=1, min_width=3, min_height=15, max_height=50, min_area=30, block_size=3 , c=5, nearline_tolerance=10.0 ) # tweaked for white font segments = trainingsegmenter.process(training_image.image) if verbose: trainingsegmenter.display() # grounder = UserGrounder() # interactive version; lets the user review, assign ground truth data grounder = TextGrounder() # non-interactive ground-truth - assumes clean, ordered input grounder.ground(training_image, segments, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ") # writes out a .box file of image ground truths
from files import ImageFile from segmentation import ContourSegmenter from feature_extraction import SimpleFeatureExtractor from classification import KNNClassifier from ocr import OCR, accuracy, show_differences segmenter = ContourSegmenter(blur_y=5, blur_x=5, block_size=11, c=10) extractor = SimpleFeatureExtractor(feature_size=10, stretch=False) classifier = KNNClassifier() ocr = OCR(segmenter, extractor, classifier) ocr.train(ImageFile('digits1')) test_image = ImageFile('digits2') test_chars, test_classes, test_segments = ocr.ocr(test_image, show_steps=True) print("accuracy:", accuracy(test_image.ground.classes, test_classes)) print("OCRed text:\n", test_chars)
class TestOCR: ESC = 27 KEY_UP = 0 KEY_DOWN = 1 def __init__(self, model, filenames, flags, pattern): self.ocr = OCR() self.ocr.loadModel(model, OCR.MODEL_ANN, flags) if filenames: counter = {} for filename in filenames: ch = self.ocr.charFromFile(filename) if pattern: label = self.__exec(pattern, filename) if label in counter: predicted, total = counter[label] counter[label] = (predicted + int(label == ch), total + 1) else: counter[label] = (int(label == ch), 1) print "%s in %s" % (ch, filename) if pattern: self.__analyze(counter) else: h, w = 200, 200 img = np.zeros((h, w, 3), np.uint8) img[:,:] = (255, 255, 255) self.brush = Brush("Brush") self.sketcher = Sketcher('Test OCR', img, self.brush) def __analyze(self, counter): print "" totalPercent = [] for label, (predicted, total) in sorted(counter.iteritems()): percent = int(float(predicted) / total * 100) totalPercent.append(percent) print "{0}\t{1} / {2} - {3} %".format(label, predicted, total, percent) print "\nTotal recognized : {0:.1f} %\n".format(np.mean(totalPercent)) def __exec(self, file, arg): out = subprocess.Popen([file, arg], stdout=subprocess.PIPE).communicate()[0] return out def __displayResponse(self, ch): response = np.zeros((75, 75, 3), np.uint8) response[:,:] = (255, 255, 255) cv2.putText(response, ch, (15, 35), cv2.FONT_HERSHEY_PLAIN, 3.0, (0, 0, 0), 1) cv2.imshow("Response", response) def run(self): while True: k = cv2.waitKey(0) & 0xFF if k == self.ESC: break elif k == ord('r'): self.sketcher.reset() self.sketcher.show() elif k == ord(' '): ch = self.ocr.charFromImage(self.sketcher.sketch) self.sketcher.show() self.sketcher.reset() self.__displayResponse(ch) print "OCR : %s" % ch elif k == self.KEY_UP: self.brush.brushSize += 1 elif k == self.KEY_DOWN: self.brush.brushSize -= 1
def __init__(self): self.Detect_cmnd = CENTER_MODEL( weight_path="Detect_cmnd/weights/model_cmnd_best.pth") self.Detect_fields = Detector_fields() self.ocr = OCR() self.fields = ['id', 'name', 'date', 'ad1', 'ad2']
def __init__(self, gui): QThread.__init__(self) self.ocr = OCR(debug=False, gui=gui) self.ocr_thread = threading.Thread(name="ocr_thread", target=self.ocr.main)
DEBUG = True # TODO: state management and how to handle multiple request on this? yolo = Yolo( img_width=1056, img_height=576, confidence_threshold=0.6, non_max_supress_theshold=0.4, classes_filename='../config/classes.names', model_architecture_filename="../config/yolov3_license_plates.cfg", model_weights_filename="../config/yolov3_license_plates_last.weights", output_directory='../debug/', output_image=True) ocr = OCR(model_filename="../config/attention_ocr_model.pth", use_cuda=False, threshold=0.7) @app.route('/') def index(): return "Live and Running!" @app.route('/', methods=['POST']) def run_lpr(): logging.info(f'Debug mode {app.debug}') if request.method == 'POST': file = request.files['file'] img_bytes = file.read()
def img_ocr(): ocr = OCR(title='图片文本识别') ocr.construct_gui()
features= numpy.asarray( features, dtype=numpy.float32 ) retval, result_classes, neigh_resp, dists= self.knn.find_nearest(features, k= 1) return result_classes ########NEW FILE######## __FILENAME__ = example from files import ImageFile from segmentation import ContourSegmenter, draw_segments from feature_extraction import SimpleFeatureExtractor from classification import KNNClassifier from ocr import OCR, accuracy, show_differences, reconstruct_chars segmenter= ContourSegmenter( blur_y=5, blur_x=5, block_size=11, c=10) extractor= SimpleFeatureExtractor( feature_size=10, stretch=False ) classifier= KNNClassifier() ocr= OCR( segmenter, extractor, classifier ) ocr.train( ImageFile('digits1') ) test_image= ImageFile('digits2') test_classes, test_segments= ocr.ocr( test_image, show_steps=True ) print "accuracy:", accuracy( test_image.ground.classes, test_classes ) print "OCRed text:\n", reconstruct_chars( test_classes ) show_differences( test_image.image, test_segments, test_image.ground.classes, test_classes) ########NEW FILE######## __FILENAME__ = feature_extraction import numpy import cv2 from segmentation import region_from_segment
for case in test_case_names: with open('./test_suite/test_files/expected/' + case + '.txt') as expected_file: test_cases.append((case + '.PNG', expected_file.read())) # Execute test cases. for file_name, expected_text in test_cases: print('=====================================') # Convert file to encoded Base64 string. file_path = test_files_path.format(file_name) with open(file_path, 'rb') as image_file: base_64_string = base64.b64encode(image_file.read()) # Create OCR object. ocr = OCR(debug_mode=True) # Create temp_files directory. os.makedirs(ocr.temp_files_directory_path, exist_ok=True) # Get output. (_, recognized_text, _) = ocr.parse_image(base_64_string=base_64_string) # Delete temp_files directory. shutil.rmtree(ocr.temp_files_directory_path) # Case: test passed. if recognized_text == expected_text: print(test_passed.format(file_name))
else: return '*' def _clean_rep(self, rep): return rep.strip().lower() def _valid_rep(self, rep): return rep != '' def _validate(self): if len(self.cleaned[0]) != 38: raise "Invalid column 0" if len(self.cleaned[1]) != 38: raise "Invalid column 1" if len(self.cleaned[2]) != 38: raise "Invalid column 2" if len(self.cleaned[3]) != 35: raise "Invalid column 3" if __name__ == '__main__': print "Starting" # image = cv2.imread(sys.argv[1]) # splitter = Splitter(image) # images_dir = splitter.crop() columns = OCR('buid/14-10-21-21')() counter = Counter(columns) print counter.count()
def setUp(self): self.ocr = OCR('AccountNumbers.txt')
parser = argparse.ArgumentParser(); parser.add_argument("comp_size", type=int, help="Number of compound images"); parser.add_argument("nocomp_size", type=int, help="Number of non-compound images"); args = parser.parse_args(); comp_size = args.comp_size; nocomp_size = args.nocomp_size; dataFilename = "data.csv"; labelFilename = "labels.txt"; # Data or label file does not exist then just extract the features, give # label and write them to their respective files if not os.path.isfile(dataFilename) or not os.path.isfile(labelFilename): currentDir = os.path.dirname(os.path.realpath(__file__)); compPath = os.path.join(currentDir, "dataset/COMP"); noCompPath = os.path.join(currentDir, "dataset/NOCOMP"); compImgs = os.listdir(compPath); noCompImgs = os.listdir(noCompPath); print(len(compImgs)); print(len(noCompImgs)); lbp = LocalBinaryPatterns(8, 24, "uniform") #number of points, radius shape = Shapes(); ocr = OCR(); with open(dataFilename, "w") as dataFile, open(labelFilename, "w") as labelsFile: getFeatures(compImgs, compPath, lbp, shape, ocr, dataFile, labelsFile, 1, limit = comp_size); getFeatures(noCompImgs, noCompPath, lbp, shape, ocr, dataFile, labelsFile, 0, limit = nocomp_size);
toleranceX = int(5 / 100 * boxWidth) toleranceY = int(5 / 100 * boxHeight) startX -= toleranceX startY -= toleranceY endX += toleranceX endY += toleranceY # draw the bounding box on the image # cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2) # get portion of image containing text crop = orig[startY:endY, startX:endX] # image pre-processing crop = OCR.preProcess(crop) # get text from ocr txt = (OCR.getText(crop)) boxes.append(((startX, startY, endX, endY), txt)) # simple block alignment algorithm # by finding ignorable difference between starting pixels # of each ROI def alignText(boxes, pixel, drawer): newBoxes = dict() for coord, txt in boxes: isPresent = False startY = coord[1] for i in range(startY - pixel, startY + pixel + 1):
print('Converting tiff images in', img_dir, 'to box files.') for f in sorted(os.listdir(img_dir)): if not is_file_of_type(f, 'tif'): continue f = os.path.join(img_dir, f) if not os.path.exists(change_ext(f, '.box')): tiff_to_box(banti_segmenter, f) ####################################### Load OCR from ocr import OCR print('Initializing the OCR') recognizer = OCR(args.nnet_fname, args.scaler_fname, args.labels_fname, args.ngram_fname, args.calibration, args.log_level) print('Done') ####################################### Helpers def ocr_box_dir(img_dir): print('Recognizing box files in ', img_dir) for f in sorted(os.listdir(img_dir)): if is_file_of_type(f, 'box'): f = os.path.join(img_dir, f) print('OCRing', f) recognizer.ocr_box_file(f)
def __init__(self, config, num_classes, ocr_activation, **kwargs): extra = config['MODEL']['EXTRA'] super(HighResolutionNet, self).__init__() # stem net self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = BatchNorm2d(64, momentum=BN_MOMENTUM) self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn2 = BatchNorm2d(64, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=False) self.stage1_cfg = extra['STAGE1'] num_channels = self.stage1_cfg['NUM_CHANNELS'][0] block = blocks_dict[self.stage1_cfg['BLOCK']] num_blocks = self.stage1_cfg['NUM_BLOCKS'][0] self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) stage1_out_channel = block.expansion * num_channels self.stage2_cfg = extra['STAGE2'] num_channels = self.stage2_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage2_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition1 = self._make_transition_layer([stage1_out_channel], num_channels) self.stage2, pre_stage_channels = self._make_stage( self.stage2_cfg, num_channels) self.stage3_cfg = extra['STAGE3'] num_channels = self.stage3_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage3_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) self.stage4_cfg = extra['STAGE4'] num_channels = self.stage4_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage4_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels)) ] self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels, multi_scale_output=True) last_inp_channels = np.int(np.sum(pre_stage_channels)) self.conv_region = nn.Conv2d(last_inp_channels, num_classes, kernel_size=1) self.conv_context = nn.Conv2d(last_inp_channels, last_inp_channels, kernel_size=3, padding=1) self.ocr = OCR(last_inp_channels, last_inp_channels // 2, ocr_activation) self.last_layer = nn.Conv2d( in_channels=last_inp_channels, out_channels=num_classes, kernel_size=extra['FINAL_CONV_KERNEL'], stride=1, padding=1 if extra['FINAL_CONV_KERNEL'] == 3 else 0)
#%% with open('AccountNumbers.txt') as file: for line in file.readlines(): print(line) #%% test_line = " " print(test_line) print("Original length: " + str(len(test_line))) test_line = test_line.replace(' ', '') print(test_line) print("Replaced length: " + str(len(test_line))) #%% from ocr import OCR ocr = OCR('AccountNumbers.txt') for line in ocr.lines: print(line)
class Main: def __init__(self): self.api_url = config.SERVER_CONFIG['prod'] with open('zenroom/encrypt_message.lua', 'r') as input: self.encryption_script = input.read() self._init_fields() def _init_fields(self): self.session = None self.ocr = OCR() self.reader = None self.mrtd = None self.mrz = None self.mrtd_data = None self.public_key = None self.encrypted_data = None self.personal_data = None self.portrait_image = None self.i = 0 def start(self): """ 1) Setup session & import Zencode script """ api_url = self.api_url logging.info("MRTD: Connecting with {}".format(api_url)) self.session = OnboardingSession(api_url) def get_mrz(self): """ 2) Get MRZ from ID document, should become OCR """ # mrz = config.MRZ_CONFIG['mrz1'] mrz = self.ocr.get_mrz() return mrz def get_mrtd(self): return self.mrtd.wait_for_card() def wait_for_card(self, data={}): # wait for nfc reader to detect a card mrtd = None if mrtd is None: mrtd = MRTD() if mrtd.wait_for_card(): print("Card detected!") return {"card": True} def read_card(self, data={}): mrz = self.mrz if mrz is None: logging.info("MRTD: Trying to read MRZ...") mrz = self.get_mrz() if mrz: logging.info("MRTD: MRZ received [{}]".format(mrz)) self.ocr.end_capture() self.mrtd = MRTD(mrz) self.mrz = mrz else: logging.info("MRTD: Waiting for card...") if self.get_mrtd(): return {"mrtd": True} def setup_mrtd(self): """ 3) Setup MRTD and get data """ output_file = False id_card = MRTD(self.mrz, output_file) personal_data = id_card.personal_data() if personal_data == None: logging.error("DG1 could not be read") return False image_base64 = id_card.photo_data() if image_base64 == None: logging.error("DG2 could not be read") return False self.mrtd_data = [{ 'personal_data': personal_data }, { 'image_base64': image_base64 }] def read_data(self, data={}): mrtd = self.mrtd if self.personal_data is None: logging.info("MRTD: Reading DG1 (personal data)...") self.personal_data = mrtd.personal_data() else: logging.info("MRTD: Reading DG2 (portrait image)...") portrait_image = mrtd.photo_data() self.mrtd_data = [{ 'personal_data': self.personal_data }, { 'image_base64': portrait_image }] # self.show_qr() # qr_file = image_handler.get_qr("https://decode.amsterdam/onboarding?id=", self.session.session_id) qr_file = image_handler.get_qr("", self.session.session_id) return {'qrcode': qr_file} def test_loop(self): self.i += 1 if self.i is 10: self.i = 0 return True def reset_loop(self, data={}): self.i += 1 if self.i is 6: logging.info("MRTD: Resetting...") self._init_fields() self.start() return {"reset": True} def show_qr(self): """ 4) Show QR code with session ID """ logging.info("Displaying QR code & waiting session status update") image_handler.qr_image(self.session.session_id) # self.ready.wait() def wait_for_pkey(self, data={}): status = self.session.get_status() logging.info("MRTD: Session status is [{}]".format(status)) if status == "GOT_PUB_KEY": self.get_pkey() return {"got_pkey": True} def get_pkey(self): """ 5) Retrieve public key from session """ session_data = self.session.get_data() p_key = session_data['data']['public_key'] self.public_key = {'public': p_key} def encrypt_data(self): """ 6) Encrypt data with public key """ # for test purposes # self._save_data(self.mrtd_data) self.encrypted_data = zenroom_buffer.execute( self.encryption_script, json.dumps(self.public_key), json.dumps(self.mrtd_data)) def wait_for_encryption(self, data={}): if self.encrypted_data is None: self.encrypt_data() self.attach_data() else: if self.i is 2: self.i = 0 return {"data_encrypted": True} self.i += 1 def _save_data(self, data): """ 6.2) Save encrypted data for testing purposes """ with open('output/test_data.json', 'w') as output: json.dump(data, output) def attach_data(self): """ 7) Add encrypted data to session """ self.session.attach_encrypted_data(self.encrypted_data)
elif len(self.candidates) == 1: return self.candidates[0][i-1] # Assume all current candidates are equally likely s = sum(c[i-1] for c in self.candidates) n = len(self.candidates) return int(round(float(s)/n)) def __self__(self): return "<DeckReconstructor, n=%d, avgdeck={1:%d, 2:%d, 3:%d}>" % (len(self.candidates), self[1], self[2], self[3]) def __repr__(self): return "<DeckReconstructor, candidates=%s>" % self.candidates if __name__ == '__main__': # simple test sequence import sys, os dirname, startfn = sys.argv[1:] deck = None from ocr import OCR ocr = OCR("LGE Nexus 5") imglist = sorted([fn for fn in os.listdir(dirname) if fn >= startfn]) for fn in imglist: print fn board, tileset = ocr.ocr(os.path.join(dirname, fn)) if deck is None: deck = DeckReconstructor(board) deck.update(tileset[0]) print deck