def paste_pages(self, dest_doc_path, data_list, dest_doc_is_new=False, after_page_number=False, before_page_number=False): """ Pastes pages in the document pointed by dest_doc_path from src_doc_path. Both dest and src are instances of mglib.path.DocumentPath """ next_ver_dp = DocumentPath.copy_from(dest_doc_path, version=dest_doc_path.version + 1) self.make_sure_path_exists(self.abspath(next_ver_dp)) pdftk.paste_pages(src=self.abspath(dest_doc_path), dst=self.abspath(next_ver_dp), data_list=data_list, dst_doc_is_new=dest_doc_is_new, after_page_number=after_page_number, before_page_number=before_page_number) if not dest_doc_is_new: # migrate document's own pages from previous # version (this differs from pasting into newly # created docs) pcount = self.get_pagecount(dest_doc_path) data_list.insert(0, { 'doc_path': dest_doc_path, 'page_nums': list(range(1, pcount + 1)) }) dest_page_num = 1 dest_page_count = sum([len(item['page_nums']) for item in data_list]) for item in data_list: src_path = item['doc_path'] for page_num in item['page_nums']: for step in Steps(): src_page_path = PagePath( document_path=src_path, page_num=int(page_num), step=step, page_count=self.get_pagecount(src_path)) dst_page_path = PagePath(document_path=next_ver_dp, page_num=dest_page_num, step=step, page_count=dest_page_count) logger.debug(f"src={src_page_path} dst={dst_page_path}") self.copy_page(src_page_path=src_page_path, dst_page_path=dst_page_path) dest_page_num += 1 return dest_doc_path.version + 1
def reorder_pages(self, doc_path, new_order): """ Reorders pages in the document pointed by doc_path. doc_path is an instance of mglib.path.DocumentPath In case of success returns document's new version. new_order is a list of following format: [ {'page_num': 2, page_order: 1}, {'page_num': 1, page_order: 2}, {'page_num': 3, page_order: 3}, {'page_num': 4, page_order: 4}, ] Example above means that in current document of 4 pages, first page was swapped with second one. page_num = older page order page_order = current page order So in human language, each hash is read: <page_num> now should be <page_order> """ src_doc_path = doc_path dst_doc_path = DocumentPath.copy_from(src_doc_path, version=doc_path.version + 1) self.make_sure_path_exists(self.abspath(dst_doc_path)) stapler.reorder_pages(src=self.abspath(src_doc_path), dst=self.abspath(dst_doc_path), new_order=new_order) page_count = self.get_pagecount(doc_path) if len(new_order) > page_count: logger.error( f"deleted_pages({new_order}) > page_count({page_count})") return for item in new_order: for step in Steps(): src_page_path = PagePath(document_path=src_doc_path, page_num=int(item['page_num']), step=step, page_count=len(new_order)) dst_page_path = PagePath(document_path=dst_doc_path, page_num=int(item['page_order']), step=step, page_count=len(new_order)) self.copy_page(src_page_path=src_page_path, dst_page_path=dst_page_path) return doc_path.version + 1
def get_pagecount(self, doc_path): """ Returns total number of pages for this doc_path. Total number of pages = number of page_xy.txt files in pages_dirname folder. """ doc_path_pointing_to_results = DocumentPath.copy_from( doc_path, aux_dir="results") pages_dir = self.abspath(doc_path_pointing_to_results.pages_dirname()) only_dirs = [ fi for fi in listdir(pages_dir) if isdir(join(pages_dir, fi)) ] return len(only_dirs)
def delete_pages(self, doc_path, page_numbers, skip_migration=False): """ Delets pages in the document pointed by doc_path. doc_path is an instance of mglib.path.DocumentPath In case of success returns document's new version. """ if not isinstance(page_numbers, list): logger.error("Expecting list argument") return False src_doc_path = doc_path dst_doc_path = DocumentPath.copy_from(src_doc_path, version=doc_path.version + 1) self.make_sure_path_exists(self.abspath(dst_doc_path)) stapler.delete_pages(self.abspath(src_doc_path), self.abspath(dst_doc_path), page_numbers) if skip_migration: return doc_path.version + 1 page_count = self.get_pagecount(doc_path) if len(page_numbers) > page_count: logger.error( f"deleted_pages({page_numbers}) > page_count({page_count})") return assigns = get_assigns_after_delete(total_pages=page_count, deleted_pages=page_numbers) for a in assigns: for step in Steps(): src_page_path = PagePath(document_path=src_doc_path, page_num=a[1], step=step, page_count=page_count) dst_page_path = PagePath(document_path=dst_doc_path, page_num=a[0], step=step, page_count=page_count - len(page_numbers)) self.copy_page(src_page_path=src_page_path, dst_page_path=dst_page_path) return doc_path.version + 1
def apply_automates(document_id, page_num): logger.debug("apply_automates: Begin.") try: document = Document.objects.get(id=document_id) except Document.DoesNotExist: logger.error(f"Provided document_id={document_id}, does not exists") return # use text files from the original version of the document doc_path = DocumentPath.copy_from( document.path, version=0 ) page_count = get_pagecount( default_storage.abspath(doc_path.url()) ) page_path = PagePath( document_path=doc_path, page_num=page_num, page_count=page_count, step=Step(), ) user = document.user text_path = default_storage.abspath(page_path.txt_url()) text = "" with open(text_path, "r") as f: text = f.read() automates = Automate.objects.filter(user=user) # are there automates for the user? if automates.count() == 0: logger.debug( f"No automates for user {user}. Quit." ) return # check all automates for given user (the owner of the document) matched = [] for automate in automates: if automate.is_a_match(text): logger.debug(f"Automate {automate} matched document={document}") plugin_klass = get_plugin_by_module_name( automate.plugin_name ) plugin = plugin_klass() if plugin_klass else None automate.apply( document=document, page_num=page_num, hocr=text, # Notice () - plugin passed is instance of the class plugin=plugin ) matched.append(automate) else: logger.debug( f"No match for automate={automate}" f" doc_id={document_id} page_num={page_num}" ) message = "" message = _( "%(count)s of %(total)s Automate(s) matched. ") % { 'count': len(matched), 'total': automates.count() } if len(matched) > 0: message += _("List of matched Automates: %(matched_automates)s") % { 'matched_automates': matched } automates_matching.send( sender="papermerge.core.automate", user_id=document.user.id, document_id=document_id, level=logging.INFO, message=message, page_num=page_num, text=text )