예제 #1
0
def split_and_export_dataframe(df,
                               nrows,
                               sortby=None,
                               output_name=None,
                               export_csv=True):
    """SPlit dataframe with given row numbers and export it wither in csv or excel

    Arguments:
        df {pd.DataFrame} -- Pandas dataframe
        nrows {int} -- Number of rows to split the database

    Keyword Arguments:
        sortby {string} -- Sort DataFrame before splitting (default: {None})
        output_name {string} -- Output file name to save, input if not given (default: {None})
        export_csv {bool} -- Output file format, (default: {True}) ';' seperated csv file'
    """
    if sortby is not None:
        df.sort_values(by=sortby, inplace=True)
    n_iter = int(pd.np.ceil(len(df) / nrows))
    if output_name is None:
        output_name = input("Please name the output file to save: ")

    if export_csv:
        for i in range(n_iter):
            df[nrows * i:nrows * i + nrows].to_csv(checkfile(
                r'./{}.csv'.format(output_name)),
                                                   sep=';',
                                                   index=False)
    else:
        for i in range(n_iter):
            df[nrows * i:nrows * i + nrows].to_excel(checkfile(
                r'./{}.xlsx'.format(output_name)),
                                                     index=False)
예제 #2
0
    def rotate(self,
               pages: List[int] = None,
               step: int = 1,
               save_as: os.PathLike = None) -> None:
        """
        Rotate given pages in steps of 90 degrees, negative sign for counter clockwise direction

        Args:
            pages (List[int], optional): _description_. Defaults to None.
            step (int, optional): _description_. Defaults to 1.
            save_as (os.PathLike, optional): _description_. Defaults to None.
        """
        if pages is None or pages == []:
            pages = list(range(len(self.pdf.pages)))

        # page_to_rotate = [0, ]
        for page in self.document:
            if page.number in pages and isinstance(step, int):
                # Rotate page clockwise 90 degrees and add to new pdf
                page.set_rotation(step * 90)

        if save_as is None:
            self.document.save(checkfile(path))
        else:
            self.document.save(checkfile(save_as))
예제 #3
0
    def merge(docpaths: List[os.PathLike, ] = None,
              output: os.PathLike = None) -> None:
        """
        Summary:
            Merge given list of pdf paths

        Args:
            docpaths (List[os.PathLike, ], optional): _description_. Defaults to None.
            output (os.PathLike, optional): _description_. Saves in folder of first path as Binder
        """

        pdf = PDFEditor(docpaths[0])

        for path in docpaths[1:]:
            pdf.insert(path, merge=True)

        if output is None:
            pdf.document.save(
                checkfile(
                    os.path.join(os.path.split(docpaths[0])[0], "Binder.pdf")))
        else:
            pdf.document.save(checkfile(output))
예제 #4
0
    def insert(self,
               docpath: os.PathLike,
               page_range: Tuple[int, int] = None,
               start_at: int = None,
               rotate: int = 0,
               links: bool = True,
               annots: bool = True,
               show_progress: int = 0,
               final: int = 1,
               save_as: os.PathLike = None,
               merge: bool = False) -> None:
        """
        Summary:
            Insert pdf, pages in any position, can also be used as append,
            smartly handles table of content

        Args:
            docpath (os.PathLike): Path to pdf
            page_range (Tuple[int, int], optional): _description_. Defaults to None. (All pages)
            start_at (int, optional): _description_. Defaults to None. (End of pdf aka. append)
            rotate (int, optional): _description_. Defaults to 0.
            links (bool, optional): _description_. Defaults to True.
            annots (bool, optional): _description_. Defaults to True.
            show_progress (int, optional): _description_. Defaults to 0.
            final (int, optional): _description_. Defaults to 1.
            save_as (os.PathLike, optional): _description_. Defaults to None.
            merge (bool, optional): _description_. Defaults to False. Does not save file for merging

        Returns:
            _type_: _description_
        """
        # Open file to insert
        # Saving TOC of file to insert
        doc2 = fitz.open(docpath)
        toc2 = doc2.get_toc()

        # In case of inserted document having no toc, insert name as toc
        if len(toc2) == 0:
            toc2 = [[1, os.path.splitext(os.path.basename(docpath))[0], 1]]

        # Adjusting table of content aka. bookmarks in pdf however it is TOC in pymupdf context
        # Saving initial TOC of original file
        toc1 = self.document.get_toc()
        pcount = self.document.page_count

        # Extract from_page, to_page, page_count based on pages to insert for second file
        if page_range is None:
            start, end = (0, doc2.page_count - 1
                          )  # Zero indexed page numbers correction
            pcount2 = doc2.page_count
        else:
            start, end = page_range
            pcount2 = abs(end - start +
                          1)  # Zero indexed page numbers correction

        # Start at the end of first file, ie. appending to the end with no argument
        # In this case TOC page number of second file is increased by the page number of first file
        if start_at is None or start_at >= pcount:
            start_at = pcount
            # Increase page numbers in doc2 toc1 with doc1 page count
            for t in toc2:
                t[2] += pcount
        elif start_at == 0:
            # Increase page numbers in doc1 toc1 with original doc2 page count
            for t in toc1:
                t[2] += pcount2
        else:
            # In case of inserting pages in between,
            # Increase toc1 page number after start_at position by pcount2
            # Increase toc2 page numbers by start_at
            for t in toc1[start_at:]:
                t[2] += pcount2
            for t in toc2:
                t[2] += start_at

        self.document.insert_pdf(
            doc2,  # cannot be the same object as doc1
            from_page=start,  # first page to copy, default: 0
            to_page=end,  # last page to copy, default: last page
            start_at=start_at,  # target location in doc1, default: at end
            rotate=rotate,  # rotate copied pages
            links=links,  # also copy links
            annots=annots,  # also copy annotations
            show_progress=
            show_progress,  # a message like Inserted 30 of 47 pages after given int
            final=final  # the list of already copied objects should be dropped
        )

        self.document.set_toc(toc1 + toc2)

        doc2.close()

        if not merge:
            if save_as is None:
                self.document.save(checkfile(path))
            else:
                self.document.save(checkfile(save_as))
        else:
            return self.document
예제 #5
0
    def split(self,
              page_range: Union[int, Tuple[int, int], List[int]],
              single_page: bool = False,
              save_as: os.PathLike = None) -> None:
        """
        Summary:
            Split given pdf in one of three following method based on page_range input:

            If page_range is:
            1. int              : Split in two by given pageno, TOC is preserved only in this option
            2. tuple of two int : Split page range either single pdfs or one pdf
            3. list of integers : Split given arbitrary page numbers either single pdfs
                                  or one pdf

        Args:
            page_range (Union[int, Tuple[int, int], List[int]]): _description_
            single_page (bool, optional): _description_. Defaults to False.
        """
        # Split pdf into two pdf with given page number (zero index)
        if isinstance(page_range, int):
            split_gr1 = (0, page_range)
            split_gr2 = (page_range + 1, self.document.page_count)
            toc = self.document.get_toc()

            for i in [split_gr1, split_gr2]:
                new_pdf = fitz.open()
                new_pdf.insert_pdf(self.document, from_page=i[0], to_page=i[1])
                # Adjusting page number of TOC based on split start page
                cur_toc = [[t[0], t[1], t[2] - i[0]] for t in toc
                           if t[2] in range(i[0], i[1] + 1)]
                new_pdf.set_toc(cur_toc)
                if save_as is None:
                    new_pdf.save(checkfile(self.path))
                else:
                    new_pdf.save(checkfile(save_as))

        else:
            # Split given pages with start, end number either as single page pdfs or one pdf
            if isinstance(page_range, tuple) and len(page_range) == 2:
                start, end = page_range
                pages_to_split = range(start, end)

            # Split arbitrary pages into either single page pdfs or one pdf
            elif isinstance(page_range, list) and len(page_range) != 0:
                pages_to_split = [i for i in page_range]

            if single_page:
                for pageidx in pages_to_split:
                    new_pdf = fitz.open()
                    new_pdf.insert_pdf(self.document,
                                       from_page=pageidx,
                                       to_page=pageidx)
                    if save_as is None:
                        new_pdf.save(checkfile(self.path))
                    else:
                        new_pdf.save(checkfile(save_as))
                    new_pdf.close()
            else:
                self.document.select(pages_to_split)
                if save_as is None:
                    self.document.save(checkfile(self.path))
                else:
                    self.document.save(checkfile(save_as))