def open_folder(self): item = self.ui.search_result_list.currentItem() if item is not None: cmd = edocuments.config.get('open_cmd').split(' ') cmd.append(os.path.dirname( edocuments.long_path(item.result.get('path')))) call(cmd)
def do_update_library(self): docs_to_rm = [] docs_date = {} with index().index.reader() as reader: for num, doc in reader.iter_docs(): if \ doc[PATH] in docs_date or \ not Path(edocuments.long_path(doc[PATH])).exists() or \ doc[PATH] != edocuments.short_path(doc[PATH]): print("Delete document: " + doc[PATH]) docs_to_rm.append(num) else: docs_date[doc[PATH]] = (doc.get(DATE), doc.get(MD5)) self.update_library_progress.emit( 0, 'Adding the directories...', '') index_folder = '.index' for directory in Path(edocuments.root_folder).rglob('*'): dir_ = edocuments.short_path(directory) if \ dir_ not in docs_date and \ directory.is_dir() and \ directory != index_folder: ignore = False for ignore_pattern in edocuments.config.get('ignore', []): if directory.match(ignore_pattern): ignore = False break if not ignore: with index().index.writer() as writer: writer.update_document(**{ PATH: dir_, CONTENT: dir_, DATE: directory.stat().st_mtime, DIRECTORY: True, }) self.update_library_progress.emit( 0, 'Browsing the files (0)...', '') index_folder += '/' todo = [] for conv in edocuments.config.get('to_txt'): cmds = conv.get("cmds") for filename in Path(edocuments.root_folder).rglob( "*." + conv.get('extension')): ignore = False for ignore_pattern in edocuments.config.get('ignore', []): if directory.match(ignore_pattern): ignore = False break if not ignore and filename.exists() and str(filename).find(index_folder) != 0: current_date, md5 = docs_date.get(edocuments.short_path(filename), (None, None)) new_date = filename.stat().st_mtime new_md5 = hashlib.md5() with open(str(filename), "rb") as f: for chunk in iter(lambda: f.read(4096), b""): new_md5.update(chunk) if current_date is None or new_date > current_date: if current_date is not None and (md5 is None or md5 == new_md5.hexdigest()): doc = index().get(filename) index().add( filename, doc[CONTENT], max(new_date, current_date), new_md5.hexdigest() ) else: print("Add document: " + edocuments.short_path(filename)) todo.append((str(filename), cmds, new_date, new_md5.hexdigest())) self.update_library_progress.emit( 0, 'Browsing the files (%i)...' % len(todo), edocuments.short_path(filename)) self.nb = len(todo) self.nb_error = 0 self.no = 0 print('Removes %i old documents.' % len(docs_to_rm)) with index().index.writer() as writer: for num in docs_to_rm: writer.delete_document(num) self.update_library_progress.emit( 0, 'Parsing the files %i/%i.' % (self.no, self.nb), '', ) print('Process %i documents.' % len(todo)) with ThreadPoolExecutor( max_workers=edocuments.config.get('nb_process', 8) ) as executor: future_results = { executor.submit(self.to_txt, t): t for t in todo } for feature in as_completed(future_results): pass self.update_library_progress.emit( 0, 'Optimise the index...', '', ) index().optimize() if self.nb_error != 0: self.scan_error.emit("Finished with %i errors" % self.nb_error) else: self.update_library_progress.emit( 100, 'Finish', '', )
def filename(self): return edocuments.long_path(self.ui.scan_to.text())