def process_image(self, image_cv2: np.ndarray, options: dict) -> None:

        tessdata_best_path = get_absolute_path('tessdata/best')
        tessdata_fast_path = get_absolute_path('tessdata/fast')

        tessdata_path = tessdata_best_path if options['best'] else tessdata_fast_path

        custom_config = f'--oem {options["oem"]} --psm {options["psm"]} --tessdata-dir "{tessdata_path}"'

        # Running pipeline and collecting image metadata

        # Image to be stored - cv2 / numpy array format
        # cv2 stores images in BGR format, but pytesseract assumes RGB format. Perform conversion.
        rgb_image_cv2 = cv2.cvtColor(src=image_cv2, code=cv2.COLOR_BGR2RGB)

        # Setting up and running image processing pipeline, if necessary
        image_pipeline = ImagePipeline()
        image_pipeline.add_step(name='Grayscale', new_step=cv2.cvtColor,
                                image_param_name='src', other_params={'code': cv2.COLOR_RGB2GRAY})
        image_pipeline.add_step(name='Binary Threshold', new_step=cv2.threshold, image_param_name='src', other_params={
                                'thresh': 20, 'maxval': 255, 'type': cv2.THRESH_BINARY}, capture_index=1)

        # Image to be directly stored in db as RGB image in bytes with no loss during compression
        # cv2.imencode is expecting BGR image, not RGB
        image_stored_bytes = cv2.imencode(ext='.jpg', img=image_cv2, params=[
                                          cv2.IMWRITE_JPEG_QUALITY, 100])[1].tostring()
        image_for_pytesseract = image_pipeline.run(
            image=rgb_image_cv2) if options['preprocessing'] else rgb_image_cv2
        # Collects metadata on page text after refining with pipeline
        page_data = pytesseract.image_to_data(
            image=image_for_pytesseract, config=custom_config, output_type=Output.DICT)

        # OCRPageData object creation
        # Metadata on pipeline-refined image
        ocr_page_data = OcrPageData(image_to_data=page_data)
        self._data.append((page_data, image_stored_bytes, ocr_page_data))
Beispiel #2
0
    def display_info(self):
        """
        When the information button is pressed, this window spawns with the information about the new
        document options
        """
        text_file = Qw.QTextBrowser()
        text = open(get_absolute_path("information_doc_options.txt")).read()
        text_file.setText(text)
        dialog = Qw.QDialog(parent=self)

        desktop = Qw.QDesktopWidget()
        desktop_size = desktop.availableGeometry(
            desktop.primaryScreen()).size()
        dialog.resize(desktop_size.width() * 0.2, desktop_size.height() * 0.4)

        temp_layout = Qw.QHBoxLayout()
        temp_layout.addWidget(text_file)
        dialog.setWindowTitle("Information")
        dialog.setLayout(temp_layout)
        dialog.show()
Beispiel #3
0
    def process_image(idx: int,
                      filepath: str,
                      oem: int = 3,
                      psm: int = 3,
                      best: bool = True,
                      preprocessing: bool = False) -> tuple:
        """
        Processes image using ImagePipeline

        Parameters
        filepath - filepath where image or PDF is stored
        oem - OCR engine mode (0-3)
        psm - page segmentation mode (0-13) Modes 0-2 don't perform OCR, so don't allow those
        best - whether to use the best model (or fast model)
        preprocessing - whether to refine image temporarily with ImagePipeline before running pytesseract
        """

        try:
            if oem not in range(4):
                raise ValueError(
                    'oem must be an integer between 0 and 3 inclusive')
            if psm not in range(3, 14):
                raise ValueError(
                    'psm must be an integer between 3 and 13 inclusive')
        except ValueError as error:
            print(str(error))
            return

        image_cv2 = cv2.imread(filename=filepath, flags=cv2.IMREAD_COLOR)

        tessdata_best_path = get_absolute_path('tessdata/best')
        tessdata_fast_path = get_absolute_path('tessdata/fast')

        tessdata_path = tessdata_best_path if best else tessdata_fast_path

        custom_config = f'--oem {oem} --psm {psm} --tessdata-dir "{tessdata_path}"'

        # Running pipeline and collecting image metadata

        # Image to be stored - cv2 / numpy array format
        # cv2 stores images in BGR format, but pytesseract assumes RGB format. Perform conversion.
        rgb_image_cv2 = cv2.cvtColor(src=image_cv2, code=cv2.COLOR_BGR2RGB)

        # Setting up and running image processing pipeline, if necessary
        def grayscale_flat_field_correction(src: np.ndarray,
                                            ksize: int = 99) -> np.ndarray:
            image_grayscale = src if src.ndim == 2 else cv2.cvtColor(
                src=src, code=cv2.COLOR_BGR2GRAY)
            blur = cv2.medianBlur(src=image_grayscale, ksize=ksize)
            mean = cv2.mean(src=blur)[0]

            # It's fine if we divide by zero
            with np.errstate(divide='ignore', invalid='ignore'):
                flat_field = (image_grayscale * mean) / blur
            return flat_field

        image_pipeline = ImagePipeline()
        image_pipeline.add_step(name='Grayscale',
                                new_step=cv2.cvtColor,
                                image_param_name='src',
                                other_params={'code': cv2.COLOR_RGB2GRAY})
        image_pipeline.add_step(name='Flat-Field',
                                new_step=grayscale_flat_field_correction,
                                image_param_name='src',
                                other_params={'ksize': 91})

        # Image to be directly stored in db as RGB image in bytes with no loss during compression
        # cv2.imencode is expecting BGR image, not RGB
        image_stored_bytes = cv2.imencode(
            ext='.jpg', img=image_cv2, params=[cv2.IMWRITE_JPEG_QUALITY,
                                               100])[1].tostring()
        image_for_pytesseract = image_pipeline.run(
            image=rgb_image_cv2) if preprocessing else rgb_image_cv2
        # Collects metadata on page text after refining with pipeline
        os.environ['OMP_THREAD_LIMIT'] = '1'
        page_data = pytesseract.image_to_data(image=image_for_pytesseract,
                                              config=custom_config,
                                              output_type=Output.DICT)

        # OCRPageData object creation
        # Metadata on pipeline-refined image
        ocr_page_data = OcrPageData(image_to_data=page_data)

        return (idx, (page_data, image_stored_bytes, ocr_page_data))
Beispiel #4
0
    def __init__(self, new_doc_cb, parent=None):
        super().__init__(parent)

        db.connect(reuse_if_open=True)

        self._filter = ''

        self.new_doc_cb = new_doc_cb

        self.setSizePolicy(Qw.QSizePolicy.MinimumExpanding,
                           Qw.QSizePolicy.MinimumExpanding)

        self._layout = Qw.QVBoxLayout()

        self.doc_grid = Qw.QGridLayout()
        self.scroll_area = Qw.QScrollArea()
        self.scroll_area.setWidgetResizable(True)
        self.ui_box = Qw.QHBoxLayout()

        self._doc_buttons = []

        self.search_bar = Qw.QLineEdit()
        self.search_bar.setPlaceholderText("Search for document name...")
        self.search_bar.textChanged.connect(self.update_filter)

        self.doc_search = Qw.QRadioButton("DOC")
        self.doc_search.clicked.connect(self.update_filter)
        self.doc_search.setChecked(True)
        self.ocr_search = Qw.QRadioButton("OCR")
        self.ocr_search.clicked.connect(self.update_filter)

        self.remove_mode = Qw.QPushButton("Enable remove mode")
        self.remove_mode.setCheckable(True)
        self.remove_mode.toggled.connect(self.set_remove_mode)

        self.ui_box.addWidget(self.doc_search)
        self.ui_box.addWidget(self.ocr_search)
        self.ui_box.addWidget(self.search_bar)
        self.ui_box.addWidget(self.remove_mode)
        # produces the document buttons that users can interact with
        for doc in OcrDocument.select():
            # assuming that each doc will surely have at least one page
            img = doc.pages[0].image
            name = doc.name

            doc_button = SingleDocumentButton(name, img, doc)
            doc_button.pressed.connect(
                lambda doc=doc: self.create_doc_window(doc))
            doc_button.setVisible(True)
            self._doc_buttons.append(doc_button)

        new_doc_button_icon = open(get_absolute_path("icons/plus_icon.png"),
                                   "rb").read()
        self.new_doc_button = SingleDocumentButton('Add New Document',
                                                   new_doc_button_icon, None)
        self.new_doc_button.pressed.connect(
            lambda: self.create_new_doc_window())

        self._active_docs = self._doc_buttons

        self.render_doc_grid()

        self._layout.addLayout(self.ui_box)
        self._layout.addWidget(self.scroll_area)

        self.setLayout(self._layout)
        db.close()
Beispiel #5
0
    def __init__(self, new_doc_cb, doc=None, parent=None):
        super().__init__(parent)
        db.connect(reuse_if_open=True)

        self.new_doc_cb = new_doc_cb

        self._doc = doc
        self._doc_size = 0 if self._doc is None else len(self._doc.pages)

        self.parentWidget().close_event_signal.connect(self.cleanup_temp_files)

        self.display_preview_button = Qw.QPushButton(
            "Show document preview", default=False, autoDefault=False, parent=self)
        self.display_preview_button.setCheckable(True)
        if self._doc is None:
            self.display_preview_button.setEnabled(False)
        self.display_preview_button.toggled.connect(
            self.on_display_preview_button_toggled)

        self.choose_file_button = Qw.QPushButton(
            "Add files", default=False, autoDefault=False, parent=self)
        self.choose_file_button.clicked.connect(self.choose_files)

        self.remove_file_button = Qw.QPushButton(
            "Remove files", default=False, autoDefault=False, parent=self)
        self.remove_file_button.clicked.connect(self.remove_files)

        self.options = Qw.QGroupBox("Options")

        self.name_label = Qw.QLabel("Document Name:")
        self.name_edit = Qw.QLineEdit(parent=self)
        if self._doc is not None:
            self.name_edit.setText(self._doc.name)
            # renaming is not permitted
            self.name_edit.setReadOnly(True)

        # Bug in qdarkstyle that makes dropdowns too large, so we need to add styles
        self.dropdown_style = """QComboBox::item:checked {
                height: 12px;
                border: 1px solid #32414B;
                margin-top: 0px;
                margin-bottom: 0px;
                padding: 4px;
                padding-left: 0px;
                }"""

        self.preset_label = Qw.QLabel("Preset:")
        self.preset_options = Qw.QComboBox()
        self.preset_options.setStyleSheet(self.dropdown_style)
        self.preset_options.addItem("Screenshot")
        self.preset_options.addItem("Printed Text (PDF)")
        self.preset_options.addItem("Written Paragraph")
        self.preset_options.addItem("Written Page")
        self.preset_options.addItem("Custom")
        self.preset_options.setCurrentIndex(4)
        self.preset_options.currentIndexChanged.connect(self.preset_changed)

        self.best_vs_fast = Qw.QLabel("Best Model or Fast Model:")
        self.best_vs_fast_options = Qw.QComboBox()

        self.best_vs_fast_options.setStyleSheet(self.dropdown_style)
        self.best_vs_fast_options.addItem("Fast")
        self.best_vs_fast_options.addItem("Best")
        # Default should be Best
        self.best_vs_fast_options.setCurrentIndex(1)

        self.processing_label = Qw.QLabel("Perform image preprocessing:")
        self.processing_options = Qw.QComboBox()
        self.processing_options.setStyleSheet(self.dropdown_style)
        self.processing_options.addItem("No")
        self.processing_options.addItem("Yes")
        # default should be no
        self.processing_options.setCurrentIndex(0)
        self.processing_options.currentIndexChanged.connect(self.custom_preset)

        self.psm_label = Qw.QLabel("PSM Number")
        self.psm_num = Qw.QComboBox()
        self.psm_num.setStyleSheet(self.dropdown_style)
        for i in range(3, 14):
            self.psm_num.addItem(str(i))
        # Default should be 3
        self.psm_num.setCurrentIndex(0)
        self.psm_num.currentIndexChanged.connect(self.custom_preset)

        self.info_button = Qw.QPushButton(
            default=False, autoDefault=False, parent=self)
        self.info_button.setIcon(
            Qg.QIcon(get_absolute_path("icons/info_icon.png")))
        self.info_button.clicked.connect(self.display_info)

        self.status_bar = Qw.QStatusBar()
        self.status_bar.showMessage("Ready")

        options_layout = Qw.QVBoxLayout()
        options_layout.addWidget(self.name_label)
        options_layout.addWidget(self.name_edit)
        options_layout.addWidget(self.preset_label)
        options_layout.addWidget(self.preset_options)
        options_layout.addWidget(self.best_vs_fast)
        options_layout.addWidget(self.best_vs_fast_options)
        options_layout.addWidget(self.processing_label)
        options_layout.addWidget(self.processing_options)
        options_layout.addWidget(self.psm_label)
        options_layout.addWidget(self.psm_num)
        options_layout.addWidget(self.info_button, alignment=Qc.Qt.AlignRight)
        self.options.setLayout(options_layout)

        self.file_names_label = Qw.QLabel("Files Chosen: ")
        self.listwidget = DragList(self)
        self.listwidget.file_dropped_signal.connect(self.insert_files)
        self.listwidget.drag_complete_signal.connect(self.update_file_previews)

        self.submit = Qw.QPushButton(
            "Process Document", default=False, autoDefault=False, parent=self)
        self.submit.clicked.connect(self.process_document)

        layout = Qw.QVBoxLayout()
        layout.addWidget(self.display_preview_button)
        layout.addWidget(self.choose_file_button)
        layout.addWidget(self.remove_file_button)
        layout.addWidget(self.file_names_label)
        layout.addWidget(self.listwidget)
        layout.addWidget(self.options)
        layout.addWidget(self.submit)
        layout.addWidget(self.status_bar)

        main_layout = Qw.QHBoxLayout()
        main_layout.addLayout(layout)
        self.setLayout(main_layout)

        # For the preview image feature, keep two data types
        # Dictionary that stores PDF filepath -> ([image filepaths], temp_dir)
        self.pdf_previews = {}
        # List of filenames, with PDFs already converted to images
        self._pages = []

        db.close()
Beispiel #6
0
    def __init__(self, doc, parent=None, filter=''):
        """
        Constructor method
        :param doc: OCRDocument
        :param filter: Filter from main window
        """
        super().__init__(parent=parent)
        db.connect(reuse_if_open=True)
        self.setWindowTitle(doc.name)

        desktop = Qw.QDesktopWidget()
        desktop_size = desktop.availableGeometry(
            desktop.primaryScreen()).size()
        self.resize(desktop_size.width() * 0.3, desktop_size.height() * 0.6)

        self._doc = doc
        self._filter = filter
        self._curr_page = 0
        self._pages = self._doc.pages
        self._pages_len = len(self._pages)
        # Store key as page index, value as list of blocks
        self._filtered_page_indexes = OrderedDict()

        self._layout = Qw.QVBoxLayout()

        self._options = Qw.QHBoxLayout()

        self.search_bar = Qw.QLineEdit()
        self.search_bar.setPlaceholderText("Search through notes...")
        self.search_bar.textChanged.connect(self.update_filter)

        self.case_sens_button = Qw.QRadioButton("Case Sensitive", parent=self)
        self.case_sens_button.toggled.connect(self.update_filter)

        self.filter_mode = Qw.QPushButton("Show matching pages",
                                          default=False,
                                          autoDefault=False,
                                          parent=self)
        self.filter_mode.setCheckable(True)
        self.filter_mode.toggled.connect(self.set_filter_mode)

        self._options.addWidget(self.search_bar, alignment=Qc.Qt.AlignTop)
        self._options.addWidget(self.case_sens_button)
        self._options.addWidget(self.filter_mode, alignment=Qc.Qt.AlignTop)
        self._layout.addLayout(self._options, alignment=Qc.Qt.AlignTop)

        # create button group for prev and next page buttons
        self.next_page_button = Qw.QPushButton("Next Page",
                                               default=False,
                                               autoDefault=False,
                                               parent=self)
        self.next_page_button.setSizePolicy(Qw.QSizePolicy.MinimumExpanding,
                                            Qw.QSizePolicy.Fixed)
        self.next_page_button.clicked.connect(self.next_page)
        self.prev_page_button = Qw.QPushButton("Previous Page",
                                               default=False,
                                               autoDefault=False,
                                               parent=self)
        self.prev_page_button.setSizePolicy(Qw.QSizePolicy.MinimumExpanding,
                                            Qw.QSizePolicy.Fixed)
        self.prev_page_button.clicked.connect(self.prev_page)

        self.page_number_box = Qw.QLineEdit(parent=self)
        self.page_number_box.setSizePolicy(Qw.QSizePolicy.Minimum,
                                           Qw.QSizePolicy.Fixed)
        self.page_number_box.setInputMask("0" * len(str(self._pages_len)))
        self.page_number_box.setFixedWidth(self.page_number_box.fontMetrics(
        ).boundingRect(str(self._pages_len)).width() + 20)
        self.page_number_box.editingFinished.connect(
            self.on_page_number_box_change)

        # Added viewer
        self.viewer = PhotoViewer(parent=self)
        self._layout.addWidget(self.viewer)

        self.info_button = Qw.QPushButton(default=False,
                                          autoDefault=False,
                                          parent=self)
        self.info_button.setIcon(
            Qg.QIcon(get_absolute_path("icons/info_icon.png")))
        self.info_button.clicked.connect(self.display_info)

        self.rename_button = Qw.QPushButton("Rename doc",
                                            default=False,
                                            autoDefault=False,
                                            parent=self)
        self.rename_button.clicked.connect(self.rename_document)

        self.export_button = Qw.QPushButton("Export as PDF",
                                            default=False,
                                            autoDefault=False,
                                            parent=self)
        self.export_button.clicked.connect(self.export_pdf)

        self.add_pages_button = Qw.QPushButton("Add pages",
                                               default=False,
                                               autoDefault=False,
                                               parent=self)
        self.add_pages_button.clicked.connect(
            lambda: self.add_pages(self._doc))

        self._button_group = Qw.QHBoxLayout()
        self._button_group.addWidget(self.rename_button)
        self._button_group.addWidget(self.add_pages_button)
        self._button_group.addWidget(self.prev_page_button)
        self._button_group.addWidget(self.page_number_box)
        self._button_group.addWidget(self.next_page_button)
        self._button_group.addWidget(self.export_button)
        self._button_group.addWidget(self.info_button)
        self._layout.addLayout(self._button_group)

        self.setLayout(self._layout)

        # if filter passed through from main window, set the search bar text and update window
        if self._filter:
            self.search_bar.setText(self._filter)
            self.update_filter()
        self.jump_to_page(0)

        db.close()
Beispiel #7
0
from peewee import (Model, Check, PrimaryKeyField, CharField, IntegerField,
                    BlobField, ForeignKeyField, TextField)
from playhouse.sqlite_ext import SqliteExtDatabase

from StudiOCR.util import get_absolute_path

# Should likely change where the database files are stored
DATABASE = get_absolute_path('ocr_files.db')

# Do we need c extensions?
db = SqliteExtDatabase(
    DATABASE,
    autoconnect=False,
    c_extensions=False,
    pragmas={
        'journal_mode': 'delete',  # Use DELETE mode
        'foreign_keys': 1
    })  # Enforce foreign-key constraints


class BaseModel(Model):
    class Meta:
        database = db


# Table entry for an OCR'ed document
class OcrDocument(BaseModel):
    id = PrimaryKeyField(null=False)
    name = CharField(unique=True)

    def delete_document(self):