def run_img_rec(data): ''' Runs OCR. Takes in an ImgPack, and returns an image descriptor object with the text object filled out. ''' import praxyk print('Transferring image data to %s...' % (STORE_BASENAME + data.img_name)) img_out = open(STORE_BASENAME + data.img_name, 'w+') img_out.write(data.data) img_out.close() print('{Running OCR...}') data.img_text = praxyk.get_string_from_image(STORE_BASENAME + data.img_name) data.dump_data() print('This is where data can be returned to the user') return data.img_text
def do_ocr(trans, fileh, file_num) : trans_id = trans.id # this if statement checks if the trans has been canceled, if so we mark this result as canceled # and return right away, avoiding further computation if trans.status == Transaction.TRANSACTION_CANCELED : this_result = Result_POD_OCR.query.filter(transaction_id=trans_id).filter(item_number=file_num).first() if this_result : this_result.finished_at = datetime.datetime.now() this_result.status = Result_POD_OCR.RESULT_CANCELED this_result.result_string = "" this_result.save(full=True) app_context.pop() return this_result # get the individual result struct from redis that this queue task is processing this_result = Result_POD_OCR.query.filter(transaction_id=trans_id).filter(item_number=file_num).first() if not this_result : print "POD Worker Error, Can't Find This Result" return False # if we're the first image in the queue, create the working directory for this transaction imgs_dir = STORE_BASENAME + str(trans_id) if not os.path.exists(imgs_dir): os.makedirs(imgs_dir) # put the image data into a temporary file for the OCR program to use file_img = imgs_dir+fileh['name']+'_'+str(file_num) with open(file_img, 'wr+') as fh : fh.write(fileh['data']) # get the actual ocr string from the praxyk pytrhon library this_result.result_string = praxyk.get_string_from_image(file_img) this_result.finished_at = datetime.datetime.now() this_result.status = Result_POD_OCR.RESULT_FINISHED # update this result object in the redis db this_result.save(full=True) # clean up the image space used os.remove(file_img) print "POD_OCR Result String : (%s) " % this_result.result_string return this_result
def test_empty_image(): try: # If system not set up, don't necessarily fail if "PRAXYK_IMAGES_DIR" not in os.environ: return True images_dir = os.environ["PRAXYK_IMAGES_DIR"] empty_image = os.path.join(images_dir, "empty.png") image_str = praxyk.get_string_from_image(empty_image) if image_str != "": raise RuntimeError("Praxyk detected a string in an empty image.") return True except: print exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, limit=5, file=sys.stdout) return False
def test_typed_text(): try: # If system not set up, don't necessarily fail if "PRAXYK_IMAGES_DIR" not in os.environ: return True images_dir = os.environ["PRAXYK_IMAGES_DIR"] typed_image = os.path.join(images_dir, "typed_text.png") image_str = praxyk.get_string_from_image(typed_image) if image_str == "": raise RuntimeError("Praxyk detected no string in image with typed text.") should_be = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" if editdistance.eval(image_str, should_be) > len(should_be)/4: raise RuntimeError("Praxyk failed to detect correct string.") return True except: print exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, limit=5, file=sys.stdout) return False