예제 #1
0
 def open_folder(self):
     item = self.ui.search_result_list.currentItem()
     if item is not None:
         cmd = edocuments.config.get('open_cmd').split(' ')
         cmd.append(os.path.dirname(
             edocuments.long_path(item.result.get('path'))))
         call(cmd)
예제 #2
0
    def do_update_library(self):
        docs_to_rm = []
        docs_date = {}
        with index().index.reader() as reader:
            for num, doc in reader.iter_docs():
                if \
                        doc[PATH] in docs_date or \
                        not Path(edocuments.long_path(doc[PATH])).exists() or \
                        doc[PATH] != edocuments.short_path(doc[PATH]):
                    print("Delete document: " + doc[PATH])
                    docs_to_rm.append(num)
                else:
                    docs_date[doc[PATH]] = (doc.get(DATE), doc.get(MD5))

        self.update_library_progress.emit(
            0, 'Adding the directories...', '')
        index_folder = '.index'
        for directory in Path(edocuments.root_folder).rglob('*'):
            dir_ = edocuments.short_path(directory)
            if \
                    dir_ not in docs_date and \
                    directory.is_dir() and \
                    directory != index_folder:
                ignore = False
                for ignore_pattern in edocuments.config.get('ignore', []):
                    if directory.match(ignore_pattern):
                        ignore = False
                        break
                if not ignore:
                    with index().index.writer() as writer:
                        writer.update_document(**{
                            PATH: dir_,
                            CONTENT: dir_,
                            DATE: directory.stat().st_mtime,
                            DIRECTORY: True,
                        })

        self.update_library_progress.emit(
            0, 'Browsing the files (0)...', '')
        index_folder += '/'
        todo = []
        for conv in edocuments.config.get('to_txt'):
            cmds = conv.get("cmds")
            for filename in Path(edocuments.root_folder).rglob(
                    "*." + conv.get('extension')):
                ignore = False
                for ignore_pattern in edocuments.config.get('ignore', []):
                    if directory.match(ignore_pattern):
                        ignore = False
                        break
                if not ignore and filename.exists() and str(filename).find(index_folder) != 0:
                    current_date, md5 = docs_date.get(edocuments.short_path(filename), (None, None))
                    new_date = filename.stat().st_mtime
                    new_md5 = hashlib.md5()
                    with open(str(filename), "rb") as f:
                        for chunk in iter(lambda: f.read(4096), b""):
                            new_md5.update(chunk)

                    if current_date is None or new_date > current_date:
                        if current_date is not None and (md5 is None or md5 == new_md5.hexdigest()):
                            doc = index().get(filename)
                            index().add(
                                filename,
                                doc[CONTENT],
                                max(new_date, current_date),
                                new_md5.hexdigest()
                            )
                        else:
                            print("Add document: " + edocuments.short_path(filename))
                            todo.append((str(filename), cmds, new_date, new_md5.hexdigest()))
                            self.update_library_progress.emit(
                                0, 'Browsing the files (%i)...' % len(todo), edocuments.short_path(filename))

        self.nb = len(todo)
        self.nb_error = 0
        self.no = 0

        print('Removes %i old documents.' % len(docs_to_rm))

        with index().index.writer() as writer:
            for num in docs_to_rm:
                writer.delete_document(num)

        self.update_library_progress.emit(
            0, 'Parsing the files %i/%i.' % (self.no, self.nb), '',
        )

        print('Process %i documents.' % len(todo))

        with ThreadPoolExecutor(
            max_workers=edocuments.config.get('nb_process', 8)
        ) as executor:
            future_results = {
                executor.submit(self.to_txt, t):
                t for t in todo
            }
            for feature in as_completed(future_results):
                pass

        self.update_library_progress.emit(
            0, 'Optimise the index...', '',
        )
        index().optimize()

        if self.nb_error != 0:
            self.scan_error.emit("Finished with %i errors" % self.nb_error)
        else:
            self.update_library_progress.emit(
                100, 'Finish', '',
            )
예제 #3
0
 def filename(self):
     return edocuments.long_path(self.ui.scan_to.text())