Esempio n. 1
0
def GoogleVisionOCR(app_context, base_dir=config.BASE_DIR):

    log_debug(
        'google vision ocr process starting {}'.format(
            app_context.application_context), app_context.application_context)
    try:
        response, langs = process_input(app_context, base_dir)
        if response != None:
            return {
                'code': 200,
                'message': 'request completed',
                'rsp': response,
                'langs': langs
            }
        else:
            return {
                'code': 400,
                'message': 'Error occured during google vision ocr',
                'rsp': None
            }
    except Exception as e:
        log_exception("Error occured during google vision ocr  ",
                      app_context.application_context, e)
        return {
            'code': 400,
            'message': 'Error occured during google vision ocr ',
            'rsp': None
        }
Esempio n. 2
0
def BlockSegmenter(app_context, base_dir=config.BASE_DIR):

    log_debug(
        'block segmentation process starting {}'.format(
            app_context.application_context), app_context.application_context)
    try:

        response = get_segmented_regions(app_context, base_dir)

        if response != None:
            return {
                'code': 200,
                'message': 'request completed',
                'rsp': response
            }
        else:
            return {
                'code': 400,
                'message': 'Error occured during block segmentation',
                'rsp': None
            }
    except Exception as e:
        log_exception("Error occured during block segmentation ",
                      app_context.application_context, e)
        return {
            'code': 400,
            'message': 'Error occured during layout detection ',
            'rsp': None
        }
Esempio n. 3
0
def TextDetection(app_context, base_dir=config.BASE_DIR):

    log_debug(
        'Block merger starting processing {}'.format(
            app_context.application_context), app_context.application_context)

    try:

        words, lines, images = get_text(app_context, base_dir)
        response = get_response(app_context, words, lines, images)

        if response != None:
            return {
                'code': 200,
                'message': 'request completed',
                'rsp': response
            }
        else:
            return {
                'code': 400,
                'message': 'Error occured during pdf to blocks conversion',
                'rsp': None
            }

    except Exception as e:
        log_exception(
            "Error occured during word detection conversion" + str(e),
            app_context.application_context, e)
        return {
            'code': 400,
            'message': 'Error occured during pdf to blocks conversion',
            'rsp': None
        }
Esempio n. 4
0
def TesseractOCR(app_context, base_dir=config.BASE_DIR):

    log_debug(
        'tesseract ocr process starting {}'.format(
            app_context.application_context), app_context.application_context)
    try:
        response = process_info(app_context, base_dir)
        if response != None:
            return {
                'code': 200,
                'message': 'request completed',
                'rsp': response
            }
        else:
            return {
                'code': 400,
                'message': 'Error occured during tesseract ocr',
                'rsp': None
            }
    except Exception as e:
        log_exception("Error occured during tesseract ocr  ",
                      app_context.application_context, e)
        return {
            'code': 400,
            'message': 'Error occured during tesseract ocr ',
            'rsp': None
        }
Esempio n. 5
0
def DocumentStructure(app_context,
                      file_name,
                      lang='en',
                      base_dir=config.BASE_DIR):
    log_debug(
        'Block merger starting processing {}'.format(
            app_context.application_context), app_context.application_context)
    img_dfs, xml_dfs, working_dir, page_width, page_height, pdf_bg_img_filepaths, pdf_image_paths = doc_pre_processing(
        file_name, base_dir, lang)

    if xml_dfs == None:
        return {
            'code': 400,
            'message':
            'Document pre-processing failed, check your installation',
            'rsp': None
        }
    df = extract_word_bbox(pdf_image_paths[0])
    print(df)

    text_blocks_count = check_text(xml_dfs)
    if text_blocks_count == 0:
        log_info(
            "DocumentStructure : looks like the file is either empty or scanned type, currently we support Class-1 document.",
            app_context.application_context)
        return {
            'code': 400,
            'message':
            'looks like the file is of scanned type, currently we support Class-1 document.',
            'rsp': None
        }

    try:
        text_block_dfs, table_dfs, line_dfs, bg_dfs = doc_structure_analysis(
            xml_dfs, img_dfs, working_dir, lang, page_width, page_height,
            pdf_bg_img_filepaths, pdf_image_paths)
        response = doc_structure_response(bg_dfs, text_block_dfs, table_dfs,
                                          line_dfs, page_width, page_height)
        log_info(
            "DocumentStructure : successfully received blocks in json response",
            app_context.application_context)
        return {'code': 200, 'message': 'request completed', 'rsp': response}

    except Exception as e:
        log_exception("Error occured during pdf to blocks conversion",
                      app_context.application_context, e)
        return {
            'code': 400,
            'message': 'Error occured during pdf to blocks conversion',
            'rsp': None
        }
Esempio n. 6
0
def LayoutDetection(app_context):
    log_debug('layout detection process starting {}'.format(app_context.application_context), app_context.application_context)
    try:
        response   = get_layout(app_context)
        return {
                'code': 200,
                'message': 'request completed',
                'rsp': response
                }
    except Exception as e:
        log_exception("Error occured during layout detection ",  app_context.application_context, e)
        return {
            'code': 400,
            'message': 'Error occured during layout detection ',
            'rsp': None
            }
Esempio n. 7
0
def DocumentStructure(app_context,
                      file_name,
                      lang='en',
                      base_dir=config.BASE_DIR,
                      page_layout='single_column'):
    log_debug(
        'Block merger starting processing {}'.format(
            app_context.application_context), app_context.application_context)
    try:
        doc_structure_compose = compose(generate_response, break_blocks,
                                        merge_vertically, merge_horizontally,
                                        extract_images_and_text_regions)
        response = doc_structure_compose(file_name, base_dir, lang,
                                         page_layout)
        return {'code': 200, 'message': 'request completed', 'rsp': response}
    except Exception as e:
        log_exception("Error occured during pdf to blocks conversion",
                      app_context.application_context, e)
        return {
            'code': 400,
            'message': 'Error occured during pdf to blocks conversion',
            'rsp': None
        }