def update_filter(self): """ Updates the filter after the input in the search bar is changed """ db.connect(reuse_if_open=True) self._filter = self.search_bar.text() self._active_docs = [] if self.doc_search.isChecked(): for button in self._doc_buttons: if (self._filter.lower() in button.name.lower()): self._active_docs.append(button) elif self.ocr_search.isChecked(): words = self._filter.lower().split() for button in self._doc_buttons: text_found = False if len(words) == 0: self._active_docs.append(button) continue for page in button.doc.pages: for block in page.blocks: text = block.text.lower() for word in words: if word in text: if not text_found: self._active_docs.append(button) text_found = True break db.close() self.render_doc_grid()
def exec_filter(self): """ Performs filtering operations, populating self._filtered_page_indexes """ # clear self._filtered_page_indexes self._filtered_page_indexes = OrderedDict() # if there is search critera, then perform filtering if self._filter: db.connect(reuse_if_open=True) # search each block in the current page to see if it contains the search criteria (filter) if self.case_sens_button.isChecked(): words = self._filter.split() else: words = self._filter.lower().split() for page_index, page in enumerate(self._pages): matched_blocks = [] for block in page.blocks: if self.case_sens_button.isChecked(): text = block.text else: text = block.text.lower() # if the filter value is contained in the block text, add block to list for word in words: if word in text: matched_blocks.append(block) if len(matched_blocks) != 0: self._filtered_page_indexes[page_index] = matched_blocks db.close()
def update_image(self): db.connect(reuse_if_open=True) # if there is no search criteria, display original image of current page if not self._filter or self._curr_page not in self._filtered_page_indexes.keys( ): img = Qg.QImage.fromData(self._pages[self._curr_page].image) self._pixmap = Qg.QPixmap.fromImage(img) self.viewer.setPhoto(self._pixmap) else: # for each block containing the search criteria, draw rectangles on the image block_list = self._filtered_page_indexes[self._curr_page] img = Qg.QImage.fromData(self._pages[self._curr_page].image) self._pixmap = Qg.QPixmap.fromImage(img) for block in block_list: # set color of rectangle based on confidence level of OCR if block.conf >= 80: color = Qc.Qt.green elif (block.conf < 80 and block.conf >= 40): color = Qc.Qt.blue else: color = Qc.Qt.red painter = Qg.QPainter(self._pixmap) painter.setPen(Qg.QPen(color, 3, Qc.Qt.SolidLine)) painter.drawRect(block.left, block.top, block.width, block.height) painter.end() self.viewer.setPhoto(self._pixmap) db.close()
def quit_processes(): # stop thread status_emitter.stop() # stop process queue.put(None) ocr_process.join() db.close()
def update_button_name_docid(self, doc_id): db.connect(reuse_if_open=True) for button in self._doc_buttons: if button.doc.id == doc_id: doc = OcrDocument.get(OcrDocument.id == doc_id) button.name = doc.name break self.update_filter() db.close()
def refresh_pages(self): """ This function is a horrible hack to get the new pagecount of a document It is used in case files are added to an existing document and the user wants to see those immediatelely in the doc preview """ db.connect(reuse_if_open=True) self._pages = self._doc.pages self._pages_len = len(self._pages) db.close()
def __init__(self, doc=None, parent=None): super().__init__(parent) db.connect(reuse_if_open=True) self._image_previewer = Qw.QLabel() self.viewer = PhotoViewer(parent=self) self._doc = doc self._doc_size = 0 if self._doc is None else len(self._doc.pages) self._curr_preview_page = 0 # create button group for prev and next page buttons self.next_page_button = Qw.QPushButton( "Next Page", default=False, autoDefault=False, parent=self) self.next_page_button.setSizePolicy( Qw.QSizePolicy.MinimumExpanding, Qw.QSizePolicy.Fixed) self.next_page_button.clicked.connect(self.next_page) self.prev_page_button = Qw.QPushButton( "Previous Page", default=False, autoDefault=False, parent=self) self.prev_page_button.setSizePolicy( Qw.QSizePolicy.MinimumExpanding, Qw.QSizePolicy.Fixed) self.prev_page_button.clicked.connect(self.prev_page) self.page_number_label = Qw.QLabel(str(self._curr_preview_page + 1)) self.page_number_box = Qw.QLineEdit(parent=self) self.page_number_box.setSizePolicy( Qw.QSizePolicy.Minimum, Qw.QSizePolicy.Fixed) self.page_number_box.editingFinished.connect( lambda: self.jump_to_page(int(self.page_number_box.text())-1)) self._button_group = Qw.QHBoxLayout() self._button_group.addWidget(self.prev_page_button) self._button_group.addWidget(self.page_number_box) self._button_group.addWidget(self.next_page_button) self.preview_layout = Qw.QVBoxLayout() self.preview_layout.addWidget(self.viewer) self.preview_layout.addLayout(self._button_group) self.setLayout(self.preview_layout) self._pages = [] self._pages_len = 0 db.close() if self._doc is not None: self.update_preview_image_list([])
def update_image(self): """ Sets the image preview of the selected file """ if self._curr_preview_page < self._doc_size: db.connect(reuse_if_open=True) img = Qg.QImage.fromData( self._doc.pages[self._curr_preview_page].image) db.close() self._pixmap = Qg.QPixmap.fromImage(img) self.viewer.setPhoto(self._pixmap) elif self._pages_len > 0: self._pixmap = Qg.QPixmap( self._pages[self._curr_preview_page - self._doc_size]) self.viewer.setPhoto(self._pixmap) else: self.viewer.hide()
def __init__(self, new_doc_cb, doc=None, parent=None): super().__init__(parent=parent) db.connect(reuse_if_open=True) self._doc = doc self.new_doc_cb = new_doc_cb if self._doc is None: self.setWindowTitle("Add New Document") else: self.setWindowTitle(f"Add pages to {self._doc.name}") self.desktop = Qw.QDesktopWidget() self.desktop_size = self.desktop.availableGeometry( self.desktop.primaryScreen()).size() self.resize(self.desktop_size.width() * 0.2, self.desktop_size.height() * 0.6) self.settings = EditDocOptions(self.new_doc_cb, doc=doc, parent=self) self.preview = EditDocPreview(doc=self._doc, parent=self) self.preview.hide() self.settings.close_on_submit_signal.connect(self.close_on_submit) self.settings.has_new_file_previews.connect( self.preview.update_preview_image_list) self.settings.display_preview_toggle_signal.connect( self.set_preview_visibility) self.submitted = False self.settings_layout = Qw.QVBoxLayout() # self.settings_layout.addWidget(self.display_preview_button) self.settings_layout.addWidget(self.settings) self.layout = Qw.QHBoxLayout() self.layout.addLayout(self.settings_layout) self.setLayout(self.layout) db.close()
def rename_document(self, doc): text, ok = Qw.QInputDialog().getText(self, "Rename document", "Enter new name for document", Qw.QLineEdit.Normal) db.connect(reuse_if_open=True) query = OcrDocument.select().where(OcrDocument.name == text) if query.exists() and text != self._doc.name: msg = Qw.QMessageBox() msg.setIcon(Qw.QMessageBox.Warning) msg.setText("Document names must be unique and non empty.") msg.setInformativeText( 'There is already a document with that name.') msg.setWindowTitle("Error") msg.exec_() elif text != self._doc.name: self._doc.name = text self._doc.save() self.setWindowTitle(text) # horrible self.parentWidget().update_button_name_docid(self._doc.id) db.close()
def display_new_document(self, doc_id): """ Display the new document added by creating a button for the new doc and re-rendering the doc grid :param doc_id: ID of the new document in the database """ db.connect(reuse_if_open=True) add_button = True # only append the document if the document isn't in the grid yet for button in self._doc_buttons: if button.doc.id == doc_id: add_button = False if add_button: doc = OcrDocument.get(OcrDocument.id == doc_id) # assuming that each doc will surely have at least one page doc_button = SingleDocumentButton(doc.name, doc.pages[0].image, doc) doc_button.pressed.connect( lambda doc=doc: self.create_doc_window(doc)) self._doc_buttons.append(doc_button) self.update_filter() db.close()
def process_document(self): """ Adds a new document to the database with the file names from listwidget """ db.connect(reuse_if_open=True) name = self.name_edit.text() query = OcrDocument.select().where(OcrDocument.name == name) if (query.exists() or len(name) == 0) and self._doc is None: msg = Qw.QMessageBox() msg.setIcon(Qw.QMessageBox.Warning) msg.setText("Document names must be unique and non empty.") if len(name) == 0: msg.setInformativeText( 'Please enter a non-empty document name.') else: msg.setInformativeText( 'There is already a document with that name.') msg.setWindowTitle("Error") msg.exec_() elif len(self._pages) == 0: msg = Qw.QMessageBox() msg.setIcon(Qw.QMessageBox.Warning) msg.setText("No files were selected as part of the document.") msg.setInformativeText( 'Please select files to process.') msg.setWindowTitle("Error") msg.exec_() else: # looks like the only oem modes supported by both the fast and best model is # the new LTSM mode, so we can hardcode the oem option to 3 oem_number = 3 psm_number = self.psm_num.currentIndex()+3 best = bool(self.best_vs_fast_options.currentIndex()) preprocessing = bool(self.processing_options.currentIndex()) doc_id = None if self._doc is None else self._doc.id self.new_doc_cb(name, doc_id, self.pdf_previews, self._pages, oem_number, psm_number, best, preprocessing) self.close_on_submit_signal.emit() db.close()
def create_doc_window(self, doc): """ Depending on the state of remove, this function will either remove the document clicked or spawn a document window for the document :param doc: document to remove or display """ db.connect(reuse_if_open=True) # If remove mode is checked, then prompt and remove the document if self.remove_mode.isChecked(): confirm = Qw.QMessageBox() confirm.setWindowTitle(f"Remove Document: {doc.name}") confirm.setText( f"Are you sure you want to delete document: {doc.name}?") confirm.setIcon(Qw.QMessageBox.Question) confirm.setStandardButtons(Qw.QMessageBox.Yes) confirm.addButton(Qw.QMessageBox.No) confirm.setDefaultButton(Qw.QMessageBox.No) if confirm.exec_() == Qw.QMessageBox.Yes: button_to_remove = None for button in self._doc_buttons: if button.doc == doc: button_to_remove = button break self.doc_grid.removeWidget(button_to_remove) self._doc_buttons.remove(button_to_remove) self.update_filter() db.connect(reuse_if_open=True) doc.delete_document() db.close() else: if self.ocr_search.isChecked(): self.doc_window = DocWindow(doc, parent=self, filter=self._filter) else: self.doc_window = DocWindow(doc, parent=self) self.doc_window.show()
def __init__(self, new_doc_cb, parent=None): super().__init__(parent) db.connect(reuse_if_open=True) self._filter = '' self.new_doc_cb = new_doc_cb self.setSizePolicy(Qw.QSizePolicy.MinimumExpanding, Qw.QSizePolicy.MinimumExpanding) self._layout = Qw.QVBoxLayout() self.doc_grid = Qw.QGridLayout() self.scroll_area = Qw.QScrollArea() self.scroll_area.setWidgetResizable(True) self.ui_box = Qw.QHBoxLayout() self._doc_buttons = [] self.search_bar = Qw.QLineEdit() self.search_bar.setPlaceholderText("Search for document name...") self.search_bar.textChanged.connect(self.update_filter) self.doc_search = Qw.QRadioButton("DOC") self.doc_search.clicked.connect(self.update_filter) self.doc_search.setChecked(True) self.ocr_search = Qw.QRadioButton("OCR") self.ocr_search.clicked.connect(self.update_filter) self.remove_mode = Qw.QPushButton("Enable remove mode") self.remove_mode.setCheckable(True) self.remove_mode.toggled.connect(self.set_remove_mode) self.ui_box.addWidget(self.doc_search) self.ui_box.addWidget(self.ocr_search) self.ui_box.addWidget(self.search_bar) self.ui_box.addWidget(self.remove_mode) # produces the document buttons that users can interact with for doc in OcrDocument.select(): # assuming that each doc will surely have at least one page img = doc.pages[0].image name = doc.name doc_button = SingleDocumentButton(name, img, doc) doc_button.pressed.connect( lambda doc=doc: self.create_doc_window(doc)) doc_button.setVisible(True) self._doc_buttons.append(doc_button) new_doc_button_icon = open(get_absolute_path("icons/plus_icon.png"), "rb").read() self.new_doc_button = SingleDocumentButton('Add New Document', new_doc_button_icon, None) self.new_doc_button.pressed.connect( lambda: self.create_new_doc_window()) self._active_docs = self._doc_buttons self.render_doc_grid() self._layout.addLayout(self.ui_box) self._layout.addWidget(self.scroll_area) self.setLayout(self._layout) db.close()
def __init__(self, new_doc_cb, doc=None, parent=None): super().__init__(parent) db.connect(reuse_if_open=True) self.new_doc_cb = new_doc_cb self._doc = doc self._doc_size = 0 if self._doc is None else len(self._doc.pages) self.parentWidget().close_event_signal.connect(self.cleanup_temp_files) self.display_preview_button = Qw.QPushButton( "Show document preview", default=False, autoDefault=False, parent=self) self.display_preview_button.setCheckable(True) if self._doc is None: self.display_preview_button.setEnabled(False) self.display_preview_button.toggled.connect( self.on_display_preview_button_toggled) self.choose_file_button = Qw.QPushButton( "Add files", default=False, autoDefault=False, parent=self) self.choose_file_button.clicked.connect(self.choose_files) self.remove_file_button = Qw.QPushButton( "Remove files", default=False, autoDefault=False, parent=self) self.remove_file_button.clicked.connect(self.remove_files) self.options = Qw.QGroupBox("Options") self.name_label = Qw.QLabel("Document Name:") self.name_edit = Qw.QLineEdit(parent=self) if self._doc is not None: self.name_edit.setText(self._doc.name) # renaming is not permitted self.name_edit.setReadOnly(True) # Bug in qdarkstyle that makes dropdowns too large, so we need to add styles self.dropdown_style = """QComboBox::item:checked { height: 12px; border: 1px solid #32414B; margin-top: 0px; margin-bottom: 0px; padding: 4px; padding-left: 0px; }""" self.preset_label = Qw.QLabel("Preset:") self.preset_options = Qw.QComboBox() self.preset_options.setStyleSheet(self.dropdown_style) self.preset_options.addItem("Screenshot") self.preset_options.addItem("Printed Text (PDF)") self.preset_options.addItem("Written Paragraph") self.preset_options.addItem("Written Page") self.preset_options.addItem("Custom") self.preset_options.setCurrentIndex(4) self.preset_options.currentIndexChanged.connect(self.preset_changed) self.best_vs_fast = Qw.QLabel("Best Model or Fast Model:") self.best_vs_fast_options = Qw.QComboBox() self.best_vs_fast_options.setStyleSheet(self.dropdown_style) self.best_vs_fast_options.addItem("Fast") self.best_vs_fast_options.addItem("Best") # Default should be Best self.best_vs_fast_options.setCurrentIndex(1) self.processing_label = Qw.QLabel("Perform image preprocessing:") self.processing_options = Qw.QComboBox() self.processing_options.setStyleSheet(self.dropdown_style) self.processing_options.addItem("No") self.processing_options.addItem("Yes") # default should be no self.processing_options.setCurrentIndex(0) self.processing_options.currentIndexChanged.connect(self.custom_preset) self.psm_label = Qw.QLabel("PSM Number") self.psm_num = Qw.QComboBox() self.psm_num.setStyleSheet(self.dropdown_style) for i in range(3, 14): self.psm_num.addItem(str(i)) # Default should be 3 self.psm_num.setCurrentIndex(0) self.psm_num.currentIndexChanged.connect(self.custom_preset) self.info_button = Qw.QPushButton( default=False, autoDefault=False, parent=self) self.info_button.setIcon( Qg.QIcon(get_absolute_path("icons/info_icon.png"))) self.info_button.clicked.connect(self.display_info) self.status_bar = Qw.QStatusBar() self.status_bar.showMessage("Ready") options_layout = Qw.QVBoxLayout() options_layout.addWidget(self.name_label) options_layout.addWidget(self.name_edit) options_layout.addWidget(self.preset_label) options_layout.addWidget(self.preset_options) options_layout.addWidget(self.best_vs_fast) options_layout.addWidget(self.best_vs_fast_options) options_layout.addWidget(self.processing_label) options_layout.addWidget(self.processing_options) options_layout.addWidget(self.psm_label) options_layout.addWidget(self.psm_num) options_layout.addWidget(self.info_button, alignment=Qc.Qt.AlignRight) self.options.setLayout(options_layout) self.file_names_label = Qw.QLabel("Files Chosen: ") self.listwidget = DragList(self) self.listwidget.file_dropped_signal.connect(self.insert_files) self.listwidget.drag_complete_signal.connect(self.update_file_previews) self.submit = Qw.QPushButton( "Process Document", default=False, autoDefault=False, parent=self) self.submit.clicked.connect(self.process_document) layout = Qw.QVBoxLayout() layout.addWidget(self.display_preview_button) layout.addWidget(self.choose_file_button) layout.addWidget(self.remove_file_button) layout.addWidget(self.file_names_label) layout.addWidget(self.listwidget) layout.addWidget(self.options) layout.addWidget(self.submit) layout.addWidget(self.status_bar) main_layout = Qw.QHBoxLayout() main_layout.addLayout(layout) self.setLayout(main_layout) # For the preview image feature, keep two data types # Dictionary that stores PDF filepath -> ([image filepaths], temp_dir) self.pdf_previews = {} # List of filenames, with PDFs already converted to images self._pages = [] db.close()
def __init__(self, doc, parent=None, filter=''): """ Constructor method :param doc: OCRDocument :param filter: Filter from main window """ super().__init__(parent=parent) db.connect(reuse_if_open=True) self.setWindowTitle(doc.name) desktop = Qw.QDesktopWidget() desktop_size = desktop.availableGeometry( desktop.primaryScreen()).size() self.resize(desktop_size.width() * 0.3, desktop_size.height() * 0.6) self._doc = doc self._filter = filter self._curr_page = 0 self._pages = self._doc.pages self._pages_len = len(self._pages) # Store key as page index, value as list of blocks self._filtered_page_indexes = OrderedDict() self._layout = Qw.QVBoxLayout() self._options = Qw.QHBoxLayout() self.search_bar = Qw.QLineEdit() self.search_bar.setPlaceholderText("Search through notes...") self.search_bar.textChanged.connect(self.update_filter) self.case_sens_button = Qw.QRadioButton("Case Sensitive", parent=self) self.case_sens_button.toggled.connect(self.update_filter) self.filter_mode = Qw.QPushButton("Show matching pages", default=False, autoDefault=False, parent=self) self.filter_mode.setCheckable(True) self.filter_mode.toggled.connect(self.set_filter_mode) self._options.addWidget(self.search_bar, alignment=Qc.Qt.AlignTop) self._options.addWidget(self.case_sens_button) self._options.addWidget(self.filter_mode, alignment=Qc.Qt.AlignTop) self._layout.addLayout(self._options, alignment=Qc.Qt.AlignTop) # create button group for prev and next page buttons self.next_page_button = Qw.QPushButton("Next Page", default=False, autoDefault=False, parent=self) self.next_page_button.setSizePolicy(Qw.QSizePolicy.MinimumExpanding, Qw.QSizePolicy.Fixed) self.next_page_button.clicked.connect(self.next_page) self.prev_page_button = Qw.QPushButton("Previous Page", default=False, autoDefault=False, parent=self) self.prev_page_button.setSizePolicy(Qw.QSizePolicy.MinimumExpanding, Qw.QSizePolicy.Fixed) self.prev_page_button.clicked.connect(self.prev_page) self.page_number_box = Qw.QLineEdit(parent=self) self.page_number_box.setSizePolicy(Qw.QSizePolicy.Minimum, Qw.QSizePolicy.Fixed) self.page_number_box.setInputMask("0" * len(str(self._pages_len))) self.page_number_box.setFixedWidth(self.page_number_box.fontMetrics( ).boundingRect(str(self._pages_len)).width() + 20) self.page_number_box.editingFinished.connect( self.on_page_number_box_change) # Added viewer self.viewer = PhotoViewer(parent=self) self._layout.addWidget(self.viewer) self.info_button = Qw.QPushButton(default=False, autoDefault=False, parent=self) self.info_button.setIcon( Qg.QIcon(get_absolute_path("icons/info_icon.png"))) self.info_button.clicked.connect(self.display_info) self.rename_button = Qw.QPushButton("Rename doc", default=False, autoDefault=False, parent=self) self.rename_button.clicked.connect(self.rename_document) self.export_button = Qw.QPushButton("Export as PDF", default=False, autoDefault=False, parent=self) self.export_button.clicked.connect(self.export_pdf) self.add_pages_button = Qw.QPushButton("Add pages", default=False, autoDefault=False, parent=self) self.add_pages_button.clicked.connect( lambda: self.add_pages(self._doc)) self._button_group = Qw.QHBoxLayout() self._button_group.addWidget(self.rename_button) self._button_group.addWidget(self.add_pages_button) self._button_group.addWidget(self.prev_page_button) self._button_group.addWidget(self.page_number_box) self._button_group.addWidget(self.next_page_button) self._button_group.addWidget(self.export_button) self._button_group.addWidget(self.info_button) self._layout.addLayout(self._button_group) self.setLayout(self._layout) # if filter passed through from main window, set the search bar text and update window if self._filter: self.search_bar.setText(self._filter) self.update_filter() self.jump_to_page(0) db.close()