Exemple #1
0
 def do(self):
     if self.doc is None:
         self.doc = ImgDoc(self.__config.workdir)
     for self.current_page in range(0, self.nb_pages):
         self.emit('scan-start', self.current_page, self.nb_pages)
         try:
             self.doc.scan_single_page(self.__scan_src,
                                       self.__config.scanner_resolution,
                                       self.__config.scanner_calibration,
                                       self.__config.langs,
                                       self.__progress_cb)
             page = self.doc.pages[self.doc.nb_pages - 1]
             self.docsearch.index_page(page)
             self.emit('scan-done', page, self.nb_pages)
         except StopIteration, exc:
             logger.warning("Feeder appears to be empty and we "
                            "haven't scanned all the pages yet !")
             self.emit('scan-error', exc)
             self._wait(5.0, force=True)  # wait for all the jobs to be cancelled
             return
         except Exception, exc:
             logger.error("Error: Exception: %s" % str(exc))
             self.emit('scan-error', exc)
             self._wait(5.0, force=True)  # wait for all the jobs to be cancelled
             return
Exemple #2
0
 def import_doc(self, file_uri, config, docsearch, current_doc=None):
     if current_doc == None:
         current_doc = ImgDoc(config.workdir)
     current_doc.import_image(file_uri, config.ocrlang)
     page = current_doc.pages[current_doc.nb_pages - 1]
     docsearch.index_page(page)
     return (current_doc, page)
Exemple #3
0
 def import_doc(file_uri, config, docsearch, current_doc=None):
     """
     Import the specified image
     """
     logger.info("Importing doc '%s'" % (file_uri))
     if current_doc is None:
         current_doc = ImgDoc(config.workdir)
     current_doc.import_image(file_uri, config.langs)
     page = current_doc.pages[current_doc.nb_pages - 1]
     docsearch.index_page(page)
     return (current_doc, page)
Exemple #4
0
    def examine_rootdir(self,
                        on_new_doc,
                        on_doc_modified,
                        on_doc_deleted,
                        on_doc_unchanged,
                        progress_cb=dummy_progress_cb):
        """
        Examine the rootdir.
        Calls on_new_doc(doc), on_doc_modified(doc), on_doc_deleted(docid)
        every time a new, modified, or deleted document is found
        """
        # getting the doc list from the index
        query = whoosh.query.Every()
        results = self.__searcher.search(query, limit=None)
        old_doc_list = [result['docid'] for result in results]
        old_doc_infos = {}
        for result in results:
            old_doc_infos[result['docid']] = (result['doctype'],
                                              result['last_read'])
        old_doc_list = set(old_doc_list)

        # and compare it to the current directory content
        docdirs = os.listdir(self.docsearch.rootdir)
        progress = 0
        for docdir in docdirs:
            old_infos = old_doc_infos.get(docdir)
            doctype = None
            if old_infos is not None:
                doctype = old_infos[0]
            doc = self.docsearch.get_doc_from_docid(docdir, doctype, inst=True)
            if doc is None:
                continue
            if docdir in old_doc_list:
                old_doc_list.remove(docdir)
                assert(old_infos is not None)
                last_mod = datetime.datetime.fromtimestamp(doc.last_mod)
                if old_infos[1] != last_mod:
                    on_doc_modified(doc)
                else:
                    on_doc_unchanged(doc)
            else:
                on_new_doc(doc)
            progress_cb(progress, len(docdirs),
                        DocSearch.INDEX_STEP_CHECKING, doc)
            progress += 1

        # remove all documents from the index that don't exist anymore
        for old_doc in old_doc_list:
            # Will be a document with 0 pages
            docpath = os.path.join(self.docsearch.rootdir, old_doc)
            on_doc_deleted(ImgDoc(docpath, old_doc, label_store=self.docsearch.label_store))

        progress_cb(1, 1, DocSearch.INDEX_STEP_CHECKING)
Exemple #5
0
 def do(self, scan_src):
     if self.doc == None:
         self.doc = ImgDoc(self.__config.workdir)
     for self.current_page in range(0, self.nb_pages):
         self.emit('scan-start', self.current_page, self.nb_pages)
         self.doc.scan_single_page(scan_src,
                                   self.__config.scanner_resolution,
                                   self.__config.scanner_calibration,
                                   self.__config.ocrlang,
                                   self.__progress_cb)
         page = self.doc.pages[self.doc.nb_pages - 1]
         self.docsearch.index_page(page)
         self.emit('scan-done', page, self.nb_pages)
     self.current_page = None
Exemple #6
0
 def import_doc(file_uri, docsearch, current_doc=None):
     """
     Import the specified image
     """
     logger.info("Importing doc '%s'" % (file_uri))
     if current_doc is None:
         current_doc = ImgDoc(docsearch.rootdir)
     new = current_doc.is_new
     if file_uri[:7] == "file://":
         # XXX(Jflesch): bad bad bad
         file_uri = urllib.unquote(file_uri[7:])
     img = Image.open(file_uri)
     page = current_doc.add_page(img, [])
     return ([current_doc], page, new)
Exemple #7
0
 def import_doc(file_uri, config, docsearch, current_doc=None):
     """
     Import the specified image
     """
     logger.info("Importing doc '%s'" % (file_uri))
     if current_doc is None:
         current_doc = ImgDoc(config.settings['workdir'].value)
     new = current_doc.is_new
     if file_uri[:7] == "file://":
         # XXX(Jflesch): bad bad bad
         file_uri = file_uri[7:]
     img = Image.open(file_uri)
     page = current_doc.add_page(img, [])
     return ([current_doc], page, new)
Exemple #8
0
def main(src_dir, dst_dir):
    sys.stdout.write("Loading document %s ... " % src_dir)
    sys.stdout.flush()
    src_doc = ImgDoc(src_dir, os.path.basename(src_dir))
    sys.stdout.write("Done\n")

    if (src_doc.nb_pages <= 0):
        raise Exception("No pages found. Is this an image doc ?")

    sys.stdout.write("Analyzing document ... ")
    sys.stdout.flush()
    chars = get_chars(src_doc)
    sys.stdout.write("Done\n")

    sys.stdout.write("Generating salt ... ")
    sys.stdout.flush()
    salt = gen_salt()
    sys.stdout.write("Done\n")
    print("Will use [%s] as salt for the hash" % salt)

    sys.stdout.write("Generating char mapping ... ")
    sys.stdout.flush()
    mapping = generate_mapping(chars)
    sys.stdout.write("Done\n")

    print_mapping(mapping)

    os.mkdir(dst_dir)

    sys.stdout.write("Generating document %s ... " % dst_dir)
    sys.stdout.flush()
    dst_doc = ImgDoc(dst_dir, os.path.basename(dst_dir))
    clone_doc_content(src_doc, dst_doc, mapping, salt)
    sys.stdout.write("... Done\n")

    print("All done")
Exemple #9
0
 def get_new_doc(self):
     if self.new_doc.is_new:
         return self.new_doc
     self.new_doc = ImgDoc(self.__config['workdir'].value)
     return self.new_doc
Exemple #10
0
    def __init__(self, main_win, config, widget_tree):
        self.__main_win = main_win
        self.__config = config

        self.default_thumbnail = self.__init_default_thumbnail(
            JobDocThumbnailer.SMALL_THUMBNAIL_WIDTH,
            JobDocThumbnailer.SMALL_THUMBNAIL_HEIGHT)

        self.gui = {
            'list': widget_tree.get_object("listboxDocList"),
            'box': widget_tree.get_object("doclist_box"),
            'scrollbars': widget_tree.get_object("scrolledwindowDocList"),
            'spinner': SpinnerAnimation((0, 0)),
        }
        self.gui['loading'] = Canvas(self.gui['scrollbars'])
        self.gui['loading'].set_visible(False)
        self.gui['box'].add(self.gui['loading'])
        self.gui['scrollbars'].connect(
            "size-allocate",
            lambda x, s: GLib.idle_add(self._on_size_allocate)
        )

        self.actions = {
            'open_doc': (
                [
                    self.gui['list'],
                ],
                ActionOpenSelectedDocument(main_win, config, self)
            ),
        }
        connect_actions(self.actions)

        self.model = {
            'has_new': False,
            'by_row': {},  # Gtk.ListBoxRow: docid
            'by_id': {},  # docid: Gtk.ListBoxRow
            # keep the thumbnails in cache
            'thumbnails': {}  # docid: pixbuf
        }
        self.new_doc = ImgDoc(config['workdir'].value)

        self.job_factories = {
            'doc_thumbnailer': JobFactoryDocThumbnailer(self),
        }
        self.selected_doc = None

        self.gui['scrollbars'].get_vadjustment().connect(
            "value-changed",
            lambda v: GLib.idle_add(self._on_scrollbar_value_changed)
        )

        self.gui['list'].connect("drag-motion", self._on_drag_motion)
        self.gui['list'].connect("drag-leave", self._on_drag_leave)
        self.gui['list'].connect(
            "drag-data-received",
            self._on_drag_data_received
        )
        self.gui['list'].drag_dest_set(
            Gtk.DestDefaults.ALL,
            [], Gdk.DragAction.MOVE
        )
        self.gui['list'].drag_dest_add_text_targets()

        self.accel_group = Gtk.AccelGroup()
        self.__main_win.window.add_accel_group(self.accel_group)

        self.show_loading()