Beispiel #1
0
def run_img_rec(data):
    '''
    Runs OCR.  Takes in an ImgPack, and returns an image
    descriptor object with the text object filled out.
    '''
    import praxyk
    print('Transferring image data to %s...' % (STORE_BASENAME + data.img_name))
    img_out = open(STORE_BASENAME + data.img_name, 'w+')
    img_out.write(data.data)
    img_out.close()
    print('{Running OCR...}')
    data.img_text = praxyk.get_string_from_image(STORE_BASENAME + data.img_name)
    data.dump_data()
    print('This is where data can be returned to the user')
    return data.img_text
Beispiel #2
0
def do_ocr(trans, fileh, file_num) :
    trans_id = trans.id
    # this if statement checks if the trans has been canceled, if so we mark this result as canceled
    # and return right away, avoiding further computation
    if trans.status == Transaction.TRANSACTION_CANCELED :
        this_result = Result_POD_OCR.query.filter(transaction_id=trans_id).filter(item_number=file_num).first()
        if this_result :
            this_result.finished_at = datetime.datetime.now()
            this_result.status = Result_POD_OCR.RESULT_CANCELED
            this_result.result_string = ""
            this_result.save(full=True)
            app_context.pop()
        return this_result

    # get the individual result struct from redis that this queue task is processing
    this_result = Result_POD_OCR.query.filter(transaction_id=trans_id).filter(item_number=file_num).first()

    if not this_result :
        print "POD Worker Error, Can't Find This Result"
        return False

    # if we're the first image in the queue, create the working directory for this transaction
    imgs_dir = STORE_BASENAME + str(trans_id)
    if not os.path.exists(imgs_dir):
        os.makedirs(imgs_dir)


        # put the image data into a temporary file for the OCR program to use
    file_img = imgs_dir+fileh['name']+'_'+str(file_num)
    with open(file_img, 'wr+') as fh :
        fh.write(fileh['data'])


    # get the actual ocr string from the praxyk pytrhon library
    this_result.result_string = praxyk.get_string_from_image(file_img)

    this_result.finished_at = datetime.datetime.now()
    this_result.status = Result_POD_OCR.RESULT_FINISHED
    
    # update this result object in the redis db
    this_result.save(full=True)

    # clean up the image space used
    os.remove(file_img)
    print "POD_OCR Result String : (%s) " % this_result.result_string
    return this_result
Beispiel #3
0
def test_empty_image():
    try:
        # If system not set up, don't necessarily fail
        if "PRAXYK_IMAGES_DIR" not in os.environ:
            return True

        images_dir = os.environ["PRAXYK_IMAGES_DIR"]
        empty_image = os.path.join(images_dir, "empty.png")
        image_str = praxyk.get_string_from_image(empty_image)
        if image_str != "":
            raise RuntimeError("Praxyk detected a string in an empty image.")

        return True
    except:
        print
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                                  limit=5, file=sys.stdout)
        return False
Beispiel #4
0
def test_typed_text():
    try:
        # If system not set up, don't necessarily fail
        if "PRAXYK_IMAGES_DIR" not in os.environ:
            return True

        images_dir = os.environ["PRAXYK_IMAGES_DIR"]
        typed_image = os.path.join(images_dir, "typed_text.png")
        image_str = praxyk.get_string_from_image(typed_image)
        if image_str == "":
            raise RuntimeError("Praxyk detected no string in image with typed text.")

        should_be = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        if editdistance.eval(image_str, should_be) > len(should_be)/4:
            raise RuntimeError("Praxyk failed to detect correct string.")

        return True
    except:
        print
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                                  limit=5, file=sys.stdout)
        return False