def create_documents(size):
    # create document, add first page image and return document id
    content_type = request.headers['Content-Type']
    if content_type != "image/tiff" and content_type != 'image/jpeg' and content_type != 'application/pdf':
        logging.error('Content-Type is not a valid image format')
        return Response(status=415)

    if 'type' in request.args:
        logging.info("Form type specified")
        form_type = request.args['type']
    else:
        # ocr form to detect application type
        image_as_bytes = io.BytesIO(request.data)
        form_type = recognise(image_as_bytes)

    cursor = connect(cursor_factory=psycopg2.extras.DictCursor)
    try:
        cursor.execute('select max(document_id)+1 from documents')

        next_doc_id = cursor.fetchone()

        if next_doc_id[0] is None:
            next_doc_id[0] = 1

        cursor.execute("insert into documents (document_id, form_type, content_type, page, size, image) "
                       "values ( %(document_id)s, %(form_type)s, %(content_type)s, %(page)s, %(size)s, "
                       "%(image)s ) returning document_id",
                       {
                           "document_id": next_doc_id[0],
                           "form_type": form_type,
                           "content_type": content_type,
                           "page": "1",
                           "size": size,
                           "image": psycopg2.Binary(request.data)
                       })
        res = cursor.fetchone()

        document_id = res[0]
        complete(cursor)
    except:
        rollback(cursor)
        raise

    return Response(json.dumps({"id": document_id, "form_type": form_type}), status=201, mimetype='application/json')
def change_image(doc_id, page_no, size):
    # replace an existing page image
    content_type = request.headers['Content-Type']
    if content_type != "image/tiff" and content_type != 'image/jpeg' and content_type != 'application/pdf':
        logging.error('Content-Type is not a valid image format')
        return Response(status=415)

    cursor = connect(cursor_factory=psycopg2.extras.DictCursor)
    try:
        if page_no == 1:
            # ocr form to detect application type
            bytes = io.BytesIO(request.data)
            form_type = recognise(bytes)
            # TODO: if form_type is different to the original type, need to consider updating any page 2,3 etc... TEST reallocate on multi-page form
        else:
            cursor.execute('select form_type from documents where document_id=%(doc_id)s and page = 1',
                           {"doc_id": doc_id})
            row = cursor.fetchone()
            if row is None:
                return Response(status=404)
            form_type = row['form_type']

        cursor.execute("update documents set form_type=%(form_type)s, content_type=%(content_type)s, "
                       "size=%(size)s , image=%(image)s where document_id=%(doc_id)s and page=%(page)s",
                       {
                           "doc_id": doc_id,
                           "form_type": form_type,
                           "content_type": content_type,
                           "page": page_no,
                           "size": size,
                           "image": psycopg2.Binary(request.data)
                       })
        rowcount = cursor.rowcount
        complete(cursor)
    except:
        rollback(cursor)
        raise

    if rowcount == 0:
        return Response(status=404)

    return Response(status=200)
Esempio n. 3
0
 def scan_image(self, file):
     filename = os.path.join(dir_, "ocr_test/" + file)
     file_bytes = open(filename, 'rb')
     return recognise(file_bytes)
 def scan_image(self, file):
     filename = os.path.join(dir_, "ocr_test/" + file)
     file_bytes = open(filename, 'rb')
     return recognise(file_bytes)