Beispiel #1
0
def get_segmented_regions(app_context, base_dir):
    try:
        files = get_files(app_context.application_context)
        output = []
        for index, file in enumerate(files):
            file = get_json(base_dir, file['file']['name'])
            file_properties = File(file)
            pages = file_properties.get_pages()
            page_counts = len(pages)
            start_time = time.time()
            for page_index in range(page_counts):
                print('processing for page   :  ', page_index)
                page_lines = file_properties.get_lines(page_index)
                page_regions = file_properties.get_regions(page_index)
                page_words = file_properties.get_words(page_index)

                #page_regions =  region_unifier.region_unifier(page_lines,page_regions)
                file_properties.set_regions(
                    page_index,
                    segment_regions(page_words, page_lines, page_regions))
            output.append(file_properties.get_file())
            output[index]['status'] = {'message': "block-segmenter successful"}
            end_time = time.time()
            extraction_time = (end_time - start_time) / page_counts
            log_info(
                'block segmentation per page completed in {}'.format(
                    extraction_time), app_context.application_context)
        app_context.application_context["outputs"] = output
        log_info("successfully completed block segmentation", None)
    except Exception as e:
        log_exception("Error occured during block segmentation ",
                      app_context.application_context, e)
        return None

    return app_context.application_context
Beispiel #2
0
def get_layout(app_context):
    try:
        files = get_files(app_context.application_context)
        #files   = get_json(app_context.application_context)
        #files       = get_files(json_data)
        file_images = []
        output = []
        for index, file_new in enumerate(files):
            file = get_json(file_new['file']['name'])[0]
            file_properties = File(file)
            page_paths = file_properties.get_pages()
            start_time = time.time()
            for idx, page_path in enumerate(page_paths):
                page_lines = file_properties.get_lines(idx)
                page_words = file_properties.get_words(idx)
                line_coords = get_coord(page_lines)
                #page_path   = '/'.join(page_path.split('/')[-4:])
                page_path = 'upload/' + page_path.split('upload/')[-1]

                #masked_image, table_and_lines = extract_table_line_regions(page_path)
                #cell_regions = cell_layout(table_and_lines,page_path)
                if torch.cuda.is_available():
                    torch.cuda.device(0)
                    print("*******cuda available")
                    torch.cuda.empty_cache()
                time.sleep(1)
                regions = primalaynet.predict_primanet(page_path, line_coords)
                #regions += cell_regions
                file['pages'][idx]["regions"] = regions
            file['file'] = file_new['file']
            file['config'] = file_new['config']
            output.append(file)
            output[index]['status'] = {}
            output[index]['status']['message'] = "layout-detector successful"
            end_time = time.time()
            extraction_time = (end_time - start_time) / len(page_paths)
            log_info(
                'Layout detection per page completed in {}'.format(
                    extraction_time), app_context.application_context)
        app_context.application_context["outputs"] = output
        log_info("successfully completed layout detection", None)
    except Exception as e:
        log_exception("Error occured during prima layout detection ",
                      app_context.application_context, e)
        return None

    return app_context.application_context
Beispiel #3
0
def get_layout(app_context) :
    try:
        files       = get_files(app_context.application_context)
        file_images = []
        output      = []
        for index,file in enumerate(files):
            file_properties = File(file)
            page_paths      = file_properties.get_pages()
            for idx,page_path in enumerate(page_paths):
                page_lines  = file_properties.get_lines(idx)
                page_words  = file_properties.get_words(idx)
                line_coords = get_coord(page_lines)
                regions     = primalaynet.predict_primanet(page_path, line_coords)
                file['pages'][idx]["regions"]=regions
            output.append(file)
            output[index]['status']['message']="layout-detector successful"
        app_context.application_context["outputs"] =output
        log_info("successfully completed layout detection", None)
    except Exception as e:
        log_exception("Error occured during prima layout detection ",  app_context.application_context, e)
        return None

    return app_context.application_context