def split_and_export_dataframe(df, nrows, sortby=None, output_name=None, export_csv=True): """SPlit dataframe with given row numbers and export it wither in csv or excel Arguments: df {pd.DataFrame} -- Pandas dataframe nrows {int} -- Number of rows to split the database Keyword Arguments: sortby {string} -- Sort DataFrame before splitting (default: {None}) output_name {string} -- Output file name to save, input if not given (default: {None}) export_csv {bool} -- Output file format, (default: {True}) ';' seperated csv file' """ if sortby is not None: df.sort_values(by=sortby, inplace=True) n_iter = int(pd.np.ceil(len(df) / nrows)) if output_name is None: output_name = input("Please name the output file to save: ") if export_csv: for i in range(n_iter): df[nrows * i:nrows * i + nrows].to_csv(checkfile( r'./{}.csv'.format(output_name)), sep=';', index=False) else: for i in range(n_iter): df[nrows * i:nrows * i + nrows].to_excel(checkfile( r'./{}.xlsx'.format(output_name)), index=False)
def rotate(self, pages: List[int] = None, step: int = 1, save_as: os.PathLike = None) -> None: """ Rotate given pages in steps of 90 degrees, negative sign for counter clockwise direction Args: pages (List[int], optional): _description_. Defaults to None. step (int, optional): _description_. Defaults to 1. save_as (os.PathLike, optional): _description_. Defaults to None. """ if pages is None or pages == []: pages = list(range(len(self.pdf.pages))) # page_to_rotate = [0, ] for page in self.document: if page.number in pages and isinstance(step, int): # Rotate page clockwise 90 degrees and add to new pdf page.set_rotation(step * 90) if save_as is None: self.document.save(checkfile(path)) else: self.document.save(checkfile(save_as))
def merge(docpaths: List[os.PathLike, ] = None, output: os.PathLike = None) -> None: """ Summary: Merge given list of pdf paths Args: docpaths (List[os.PathLike, ], optional): _description_. Defaults to None. output (os.PathLike, optional): _description_. Saves in folder of first path as Binder """ pdf = PDFEditor(docpaths[0]) for path in docpaths[1:]: pdf.insert(path, merge=True) if output is None: pdf.document.save( checkfile( os.path.join(os.path.split(docpaths[0])[0], "Binder.pdf"))) else: pdf.document.save(checkfile(output))
def insert(self, docpath: os.PathLike, page_range: Tuple[int, int] = None, start_at: int = None, rotate: int = 0, links: bool = True, annots: bool = True, show_progress: int = 0, final: int = 1, save_as: os.PathLike = None, merge: bool = False) -> None: """ Summary: Insert pdf, pages in any position, can also be used as append, smartly handles table of content Args: docpath (os.PathLike): Path to pdf page_range (Tuple[int, int], optional): _description_. Defaults to None. (All pages) start_at (int, optional): _description_. Defaults to None. (End of pdf aka. append) rotate (int, optional): _description_. Defaults to 0. links (bool, optional): _description_. Defaults to True. annots (bool, optional): _description_. Defaults to True. show_progress (int, optional): _description_. Defaults to 0. final (int, optional): _description_. Defaults to 1. save_as (os.PathLike, optional): _description_. Defaults to None. merge (bool, optional): _description_. Defaults to False. Does not save file for merging Returns: _type_: _description_ """ # Open file to insert # Saving TOC of file to insert doc2 = fitz.open(docpath) toc2 = doc2.get_toc() # In case of inserted document having no toc, insert name as toc if len(toc2) == 0: toc2 = [[1, os.path.splitext(os.path.basename(docpath))[0], 1]] # Adjusting table of content aka. bookmarks in pdf however it is TOC in pymupdf context # Saving initial TOC of original file toc1 = self.document.get_toc() pcount = self.document.page_count # Extract from_page, to_page, page_count based on pages to insert for second file if page_range is None: start, end = (0, doc2.page_count - 1 ) # Zero indexed page numbers correction pcount2 = doc2.page_count else: start, end = page_range pcount2 = abs(end - start + 1) # Zero indexed page numbers correction # Start at the end of first file, ie. appending to the end with no argument # In this case TOC page number of second file is increased by the page number of first file if start_at is None or start_at >= pcount: start_at = pcount # Increase page numbers in doc2 toc1 with doc1 page count for t in toc2: t[2] += pcount elif start_at == 0: # Increase page numbers in doc1 toc1 with original doc2 page count for t in toc1: t[2] += pcount2 else: # In case of inserting pages in between, # Increase toc1 page number after start_at position by pcount2 # Increase toc2 page numbers by start_at for t in toc1[start_at:]: t[2] += pcount2 for t in toc2: t[2] += start_at self.document.insert_pdf( doc2, # cannot be the same object as doc1 from_page=start, # first page to copy, default: 0 to_page=end, # last page to copy, default: last page start_at=start_at, # target location in doc1, default: at end rotate=rotate, # rotate copied pages links=links, # also copy links annots=annots, # also copy annotations show_progress= show_progress, # a message like Inserted 30 of 47 pages after given int final=final # the list of already copied objects should be dropped ) self.document.set_toc(toc1 + toc2) doc2.close() if not merge: if save_as is None: self.document.save(checkfile(path)) else: self.document.save(checkfile(save_as)) else: return self.document
def split(self, page_range: Union[int, Tuple[int, int], List[int]], single_page: bool = False, save_as: os.PathLike = None) -> None: """ Summary: Split given pdf in one of three following method based on page_range input: If page_range is: 1. int : Split in two by given pageno, TOC is preserved only in this option 2. tuple of two int : Split page range either single pdfs or one pdf 3. list of integers : Split given arbitrary page numbers either single pdfs or one pdf Args: page_range (Union[int, Tuple[int, int], List[int]]): _description_ single_page (bool, optional): _description_. Defaults to False. """ # Split pdf into two pdf with given page number (zero index) if isinstance(page_range, int): split_gr1 = (0, page_range) split_gr2 = (page_range + 1, self.document.page_count) toc = self.document.get_toc() for i in [split_gr1, split_gr2]: new_pdf = fitz.open() new_pdf.insert_pdf(self.document, from_page=i[0], to_page=i[1]) # Adjusting page number of TOC based on split start page cur_toc = [[t[0], t[1], t[2] - i[0]] for t in toc if t[2] in range(i[0], i[1] + 1)] new_pdf.set_toc(cur_toc) if save_as is None: new_pdf.save(checkfile(self.path)) else: new_pdf.save(checkfile(save_as)) else: # Split given pages with start, end number either as single page pdfs or one pdf if isinstance(page_range, tuple) and len(page_range) == 2: start, end = page_range pages_to_split = range(start, end) # Split arbitrary pages into either single page pdfs or one pdf elif isinstance(page_range, list) and len(page_range) != 0: pages_to_split = [i for i in page_range] if single_page: for pageidx in pages_to_split: new_pdf = fitz.open() new_pdf.insert_pdf(self.document, from_page=pageidx, to_page=pageidx) if save_as is None: new_pdf.save(checkfile(self.path)) else: new_pdf.save(checkfile(save_as)) new_pdf.close() else: self.document.select(pages_to_split) if save_as is None: self.document.save(checkfile(self.path)) else: self.document.save(checkfile(save_as))