Example #1
0
 def merge_report(self):
     """
     """
     merger = PyPDF2.PdfFileMerger()
     graph = open(
         os.path.join(
             self.output_dir,
             "%sOnePham%sGraph.pdf" % (self.phage_name, self.pham.pham_no)),
         "rb")
     text = open(
         os.path.join(
             self.output_dir,
             '%sOnePham%sText.pdf' % (self.phage_name, self.pham.pham_no)),
         'rb')
     merger.append(fileobj=graph)
     merger.append(fileobj=text)
     file_path = os.path.join(
         self.final_dir,
         "%sPham%sReport.pdf" % (self.phage_name, self.pham.pham_no))
     merger.write(open(file_path, 'wb'))
     return file_path, "%sPham%sReport.pdf" % (self.phage_name,
                                               self.pham.pham_no)
def generateSplice():
    print ' > Splicing files...\n > (Ignore errors)'
    newPdf = pdf.PdfFileMerger(strict=False)
    files = [x for x in listdir('./downloads/') if x != '.DS_Store']

    if len(files) == 0:
        sys.exit(' > You must first download some files.')

    for x in files:
        f = pdf.PdfFileReader(file('./downloads/' + x, 'rb'), strict=False)
        try:
            pn = random.randint(0, f.getNumPages() - 1)
        except pdf.utils.PdfReadError:
            continue
        try:
            newPdf.merge(0, './downloads/' + x, pages=(pn, pn + 1))
        except (pdf.utils.PdfReadError, ValueError):
            continue

    newPdf.write('spliced.pdf')
    newPdf.close()
    print ' > Done!'
Example #3
0
def export_pdf(token):
    output_path = f"output/{token}"
    if os.path.exists(output_path):
        shutil.rmtree(output_path)
    os.makedirs(output_path)

    solution = json.load(open(f"data/solution.json", "r"))
    test_sheet, _ = read_excel(token)

    name_map = {}
    test_result = []
    for row in test_sheet.iter_rows(min_row=2):
        name = row[0].value
        uid = row[1].value
        tid = row[2].value
        answer = [[f"{tid}{i + 1:02d}", row[i + 3].value] for i in range(25)]
        name_map[uid] = name
        test_result.append([uid, answer])

    for uid, test in test_result:
        merger = PyPDF2.PdfFileMerger()
        tid = test[0][0][:4]
        wrong_answer_count = 0
        for qid, answer in test:
            if solution[qid]["answer"] != answer:
                wrong_answer_count += 1
                merger.append(solution[qid]["pdf"])

        if wrong_answer_count > 0:
            timestamp = time.strftime("%Y%m%d-%H%M%S",
                                      time.localtime(time.time()))
            merge_name = f"{timestamp}-{tid}-{name_map[uid]}.pdf"
            merger.write(f"{output_path}/{merge_name}")
            merger.close()
            print(f"pdf exported: {merge_name}")
        else:
            print(f"all clear: {tid}-{name_map[uid]}")

    print("evaluation finished")
def createPDF(data):
    filePaths = []

    for file in data.pdfList:
        filePath = file.path_lower
        filePaths.append(filePath)

    if ("Selected Files" in os.listdir()):
        shutil.rmtree("Selected Files")
    os.makedirs("Selected Files")

    filePathsInComp = []

    for filePath in filePaths:
        data.dbx.files_download_to_file(
            "Selected Files/" + filePath.split("/")[-1], filePath)
        filePathsInComp.append("Selected Files/" + filePath.split("/")[-1])

    pdfMerger = PyPDF2.PdfFileMerger()

    for filePath in filePathsInComp:
        pdfMerger.append(filePath)

    name = simpledialog.askstring("New PDF",
                                  "What is the filename?",
                                  parent=data.root)

    if (name + ".pdf" in os.listdir(data.destinationPath)):
        os.remove(data.destinationPath + "\\" + name + ".pdf")

    pdfMerger.write(name + ".pdf")

    pdfMerger.close()

    shutil.move(name + ".pdf", data.destinationPath)

    messagebox.showinfo("Process", "Done!")

    reset(data)
Example #5
0
def merger(argv):
    if len(argv) < 5:
        print(
            'Option \'converter\' requires at least two files to merge, please read documentation'
        )
    else:
        output = argv[2]
        files = argv[3:]
        print('Merging...')
        merger = PyPDF2.PdfFileMerger()
        merged = 0
        for pdf in files:
            try:
                merger.append(pdf)
                merged = +1
            except FileNotFoundError:
                print(f'{pdf} does not exist')
        if merged:
            merger.write(f'{output}.pdf')
            print(f'All done! {output}.pdf was created')
        else:
            print('Could not merge any files')
Example #6
0
def combine_pdfs_py(input_paths, output_path):
    """Combine PDFs into a single file
    Parameters
    ----------
    output_path : str
        Filepath for combined PDF output
    input_paths : str
        List of input PDFs in order to be combined
    Returns
    -------
    Nothing. Combined PDF saved to `output_path`.
  """

    combined_pdf = PyPDF2.PdfFileMerger()

    for input_path in input_paths:

        with open(input_path, 'rb') as input_file:
            pdf = PyPDF2.PdfFileReader(input_file)
            combined_pdf.append(pdf)

    combined_pdf.write(output_path)
def downloadProblemTexts(conn, problems, output_dir):
    text_paths = []

    for problem in problems:
        file_path = downloadProblemText(conn, problem['name'], problem['id'],
                                        output_dir)

        text_paths.append(file_path)

        print(f'downloaded {file_path}')

        time.sleep(2)

    try:
        pdf_merger = PyPDF2.PdfFileMerger()
        for path in text_paths:
            pdf_merger.append(path, import_bookmarks=False)

        pdf_merger.write(os.path.join(output_dir, 'all.pdf'))
        pdf_merger.close()
    except:
        print('failed to merge texts')
Example #8
0
def merge_pdfs(input_folder, output_file):
    """
    Merges PDF files in specified folder in alphabetical order.
    Hotfixes a bug in current version of PyPDF2 that resulted in blank pages.

    Args:
        input_folder (str): paths to pdfs that need to be merged
        output_file  (str): path to output file (including extension)
    """

    # Getting paths for pdf files only
    pdfs = glob.glob(input_folder + "*.pdf")
    pdfs.sort()

    # Merge PDFs by appending and save to output path
    with contextlib.ExitStack() as stack:
        pdfMerger = PyPDF2.PdfFileMerger()
        files = [stack.enter_context(open(pdf, 'rb')) for pdf in pdfs]
        for f in files:
            pdfMerger.append(f)
        with open(output_file, 'wb') as f:
            pdfMerger.write(f)
def merge_pdf_files(file_strings, output_path):
    """
    Takes a list of absolute file paths and converts them to an output file in output_path
    :param file_strings:
    :param output_path:
    :return:
    """
    assert isinstance(file_strings, list)
    for test_file in file_strings:
        assert isinstance(test_file, str)
        assert test_file.endswith('.pdf')

    if os.path.isdir(output_path):
        output_path = os.path.join(
            output_path, file_strings[0].replace('.pdf', '_MERGED.pdf'))
    merger = PyPDF2.PdfFileMerger(strict=False)
    for merge_file in file_strings:
        in_file = merge_file
        merger.append(in_file)
    merger.write(output_path)
    merger.close()
    return output_path
Example #10
0
def coverUP(cover_path, book_path, dest):
    pdfCOVER = open(cover_path, 'rb')
    pdfBOOK = open(book_path, 'rb')
    #Check the file has a an EOF 
    reader1=pdfCOVER.read()
    reader2=pdfBOOK.read()
    print("Checking the EOF MARKERS in the files:")
    if EOF_MARKER in reader1:
        print("-----> Book 1 has an EOF")
    else:
        print("-----> Book 1: no EOF found")
        newcover = reader1[:-5] + EOF_MARKER
        cover_path=cover_path+"_new"
        with open(cover_path, 'wb+') as f:
            f.write(newcover)
    if EOF_MARKER in reader2:
        print("-----> Book 2 has an EOF")
    else:
        print("-----> Book 2: no EOF found")
        newbook = reader2[:-5] + EOF_MARKER
        book_path=book_path+"_new"
        with open(book_path, 'wb+') as f:
            f.write(newbook)
    #Reading files through PyPDF
    pdfCoverReader = pd.PdfFileReader(cover_path)
    pdfBookReader = pd.PdfFileReader(book_path)
    merger = pd.PdfFileMerger()
    if(book_path and cover_path):
        with open(cover_path,'rb') as f:
            print("The cover's path ", cover_path)
            merger.append(pdfCoverReader)
        with open(book_path,'rb') as f:
            print("The cover's path ", book_path)
            merger.append(pdfBookReader)
    Info = pdfBookReader.getDocumentInfo()
    merger.write(dest+os.sep+Info.title+"_Final.pdf")
    merger.close()
    pdfCOVER.close()
    pdfBOOK.close()
def mergePDFS():
    import PyPDF2
    from io import BytesIO

    pdfs = [
        '/home/pranjal/Documents/Assignments/Deep Learning Project/Project Proposal/Project Proposal.pdf',
        '/home/pranjal/Documents/Assignments/Deep Learning Project/Report/Eindhoven Marathon Dataset Collection and Analysis Report.pdf'
    ]
    writer = PyPDF2.PdfFileWriter()
    tmp = BytesIO()
    path = open(
        '/home/pranjal/Documents/Assignments/Deep Learning Project/Project Proposal/Project Proposal.pdf',
        'rb')
    path2 = open(
        '/home/pranjal/Documents/Assignments/Deep Learning Project/Report/Eindhoven Marathon Dataset Collection and Analysis Report.pdf',
        'rb')
    merger = PyPDF2.PdfFileMerger()
    merger.append(fileobj=path2)
    merger.append(fileobj=path)
    merger.write(tmp)
    PyPDF2.filters.compress(tmp.getvalue())
    merger.write(open("test_out2.pdf", 'wb'))
Example #12
0
def add_bookmarks(pdf_in_filename, bookmarks_tree, pdf_out_filename=None):
    """Add bookmarks to existing PDF files
    Home:
        https://github.com/RussellLuo/pdfbookmarker
    Some useful references:
        [1] http://pybrary.net/pyPdf/
        [2] http://stackoverflow.com/questions/18855907/adding-bookmarks-using-pypdf2
        [3] http://stackoverflow.com/questions/3009935/looking-for-a-good-python-tree-data-structure
    """
    pdf_out = PyPDF2.PdfFileMerger()

    # read `pdf_in` into `pdf_out`, using PyPDF2.PdfFileMerger()
    # with open(pdf_in_filename, 'rb') as inputStream:
    inputStream = open(pdf_in_filename, 'rb')
    pdf_out.append(inputStream, import_bookmarks=False)

    # copy/preserve existing metainfo
    pdf_in = PyPDF2.PdfFileReader(pdf_in_filename)
    metaInfo = pdf_in.getDocumentInfo()
    if metaInfo:
        pdf_out.addMetadata(metaInfo)

    def crawl_tree(tree, parent):
        for title, pagenum, subtree in tree:
            current = pdf_out.addBookmark(title, pagenum, parent)  # add parent bookmark
            if subtree:
                crawl_tree(subtree, current)

    # add bookmarks into `pdf_out` by crawling `bookmarks_tree`
    crawl_tree(bookmarks_tree, None)

    # get `pdf_out_filename` if it's not specified
    if not pdf_out_filename:
        name_parts = os.path.splitext(pdf_in_filename)
        pdf_out_filename = name_parts[0] + '-new' + name_parts[1]

    # wrie `pdf_out`
    with open(pdf_out_filename, 'wb') as outputStream:
        pdf_out.write(outputStream)
Example #13
0
def add_bookmarks(file, catalogue_num, first_page_num, first_num):
    """
    Add bookmarks to the given PDF file
    It will create a copy of the PDF in the same directory
    :param file: the path of the PDF
    :param catalogue_num: the page number of the catalogue in PDF
    :param first_page_num: the page number of the first page of the content in PDF
    :param last_page_num: the page number of the last page of the content
    :return: None
    """
    try:
        print(file)
        with open(file, mode='r+b') as pdf:
            # If the last page is not specified, the page number of the original PDF is read as the last page of the default content.
            pdf_reader = PyPDF2.PdfFileReader(pdf)
            last_page_num = pdf_reader.getNumPages()

            # Copy that and export the content to it
            merger = PyPDF2.PdfFileMerger()
            merger.append(pdf)
            merger.addBookmark('Contents', int(catalogue_num) - 1)
            if first_num == '':
                bookmark_num = int(first_page_num)
            else:
                bookmark_num = int(first_num)
            for page_number in range(
                    int(first_page_num) - 1, int(last_page_num)):
                merger.addBookmark(str(bookmark_num), page_number)
                bookmark_num += 1
            output_pdf_path = clone(file)
            with open(output_pdf_path, mode='wb') as output_pdf:
                merger.write(output_pdf)
    except FileNotFoundError as e:
        print('File not found: {}'.format(e.filename))
        sys.exit(1)
    except ValueError as e:
        print('Please check your page number')
        sys.exit(1)
Example #14
0
def make_lstpdf(pnam, lst):
    pdfmetrics.registerFont(
        TTFont("IPAexGothic", "/usr/share/fonts/ipaexg.ttf"))
    sty1 = ParagraphStyle("sty1", fontName="IPAexGothic", fontSize=16)
    sty2 = ParagraphStyle("sty2",
                          alignment=TA_RIGHT,
                          fontName="IPAexGothic",
                          fontSize=9)
    pdf = PyPDF2.PdfFileMerger()
    for ag, k in lst:
        ff = PdfFormFiller(get_abst(k))
        p = ff.pdf.getPage(0)
        ff.add_text(ag, 0, (40, 14), (100, 40), sty1)
        ff.add_text(app.setting.orsj, 0, (0, 14), (p.mediaBox[2] - 40, 40),
                    sty2)
        ff.add_text(
            app.setting.year + " " + app.setting.ncon,
            0,
            (0, 24),
            (p.mediaBox[2] - 40, 40),
            sty2,
        )
        n = min(2, ff.pdf.getNumPages())
        with TemporaryFile() as fp:
            ff.write(fp)
            fp.seek(0)
            fr = PyPDF2.PdfFileReader(fp)
            pdf.append(fr, pages=(0, n))
        if n < 2:
            with open(get_abst("empty"), "rb") as fp:
                pdf.append(fp)
    if os.path.exists(pnam):
        os.remove(pnam)
        time.sleep(0.05)
    try:
        pdf.write(pnam)
    except:
        pass
Example #15
0
def add_metadata(source_stream, output_stream, author_docs, tome_doc, tome_file):
    return False
    # this is currently broken due to problems using pypdf2 - e.g. destroying of bookmarks, non-idempotency of strip

    try:
        merger = PyPDF2.PdfFileMerger()
        merger.append(source_stream)

        author_names = [author_doc['name'] for author_doc in author_docs]
        metadata = {
            '/Author': ', '.join(author_names),
            '/Title': tome_doc['title']
        }

        if tome_doc['subtitle']:
            metadata['/Subtitle'] = tome_doc['subtitle']

        merger.addMetadata(metadata)
        merger.write(output_stream)
        return True
    except (PyPDF2.utils.PdfReadError, TypeError, AssertionError, IOError, RuntimeError) as e:
        logger.error("Caught an pypdf error: {}, skipping metadata add".format(e.message))
        return False
Example #16
0
def incorp_som(f):

    x = 0
    tag = '_SOM'
    if os.path.isfile(f):
        corpo_som = os.path.splitext(f)[0]
        ext = os.path.splitext(f)[1]
        corpo = corpo_som[:-4]
        main_file = corpo + ext
        som_file = corpo + tag + ext
        if ((os.path.isfile(som_file)) and (os.path.isfile(main_file))):
            tmp_file = corpo + '_tmp' + ext
            os.rename(main_file, tmp_file)
            merger = PyPDF2.PdfFileMerger()
            merger.append(PyPDF2.PdfFileReader(tmp_file, "rb"))
            merger.append(PyPDF2.PdfFileReader(som_file, "rb"))
            merger.write(main_file)
            os.remove(tmp_file)
            os.remove(som_file)
            print(main_file)
            x = 1

    return x
Example #17
0
def mergePdfFiles(files):

    if (type(files) == list and len(files) > 0):

        new_file = open('merger_pdf.pdf', 'wb')
        pdf_merger = PyPDF2.PdfFileMerger(strict=False)

        for filename in files:
            try:
                read_file = open(filename, 'rb')
                pdf_merger.append(read_file)
            except:
                print(f'file not found {filename}')
        pdf_merger.write(new_file)
        pdf_merger.close()
        new_file.close()
        return {
            'status': 'success',
            'msg': 'Pdf merged successful',
            'filename': 'merger_pdf.pdf'
        }
    else:
        return {'status': 'error', 'msg': 'No files to merge'}
Example #18
0
 def merge_pdf(self, datas, inv):
     """
     concentrate pdfs if the number of the attachemnts is > 1
     otherwise use odoo standard for reading the pdf
     """
     if len(datas) > 1:
         merger = PyPDF2.PdfFileMerger(strict=False)
         myio = io.BytesIO()
         for pdf in datas:
             attach = pdf._file_read(pdf.store_fname)
             content = base64.b64decode(attach)
             content = io.BytesIO(content)
             try:
                 merger.append(content, import_bookmarks=False)
             except:
                 raise UserError(_('Export stopped! \n Invoice %s can not exported, because the PDF has no EOF-Marker. \n Please repair it and start the export again.' % inv.number))
         merger.write(myio)
         merger.close()
         content, filetype = myio.getvalue(), 'pdf'
     else:
         attach = datas._file_read(datas.store_fname)
         content, filetype = base64.b64decode(attach), 'pdf'
     return content, filetype
Example #19
0
def batchBeamerConversion(SourceFolder, DestinationFolder):

    if os.path.exists(DestinationFolder):
        shutil.rmtree(DestinationFolder)

    os.makedirs(DestinationFolder)

    files = [
        file for file in os.listdir(SourceFolder)
        if (os.path.isfile(os.path.join(SourceFolder, file))
            and os.path.splitext(file)[1] == ".md"
            and os.path.splitext(file)[0] != "index")
    ]

    for file in files:
        convertMdToBeamer(os.path.join(SourceFolder, file), DestinationFolder)

    files = [
        file for file in os.listdir(DestinationFolder)
        if (os.path.splitext(file)[1] == ".pdf")
    ]
    print("files to be combined: " + str(files))

    if files:

        merger = PyPDF2.PdfFileMerger()

        for filename in files:
            print("combining " + filename)
            merger.append(
                PyPDF2.PdfFileReader(
                    open(os.path.join(DestinationFolder, filename), 'rb')))
            print("combined " + filename)

        merger.write(os.path.join(DestinationFolder, "combined.pdf"))

    print("========CONVERTED TO BEAMER================")
Example #20
0
def merge_pdf(name):
    '''
    合并 pdf
    '''
    print('正在合并最终 pdf')
    # find all the pdf files in current directory.
    mypath = os.getcwd()
    pattern = r"\.pdf$"
    file_names_lst = [
        mypath + "\\" + f for f in os.listdir(mypath)
        if re.search(pattern, f, re.IGNORECASE)
        and not re.search(name + '.pdf', f)
    ]

    # 对文件路径按页码排序
    dic = {}
    for i in range(len(file_names_lst)):
        page = re.findall(r'(\d+)\.pdf', file_names_lst[i])[0]
        dic[int(page)] = file_names_lst[i]
    file_names_lst = sorted(dic.items(), key=lambda x: x[0])
    file_names_lst = [file[1] for file in file_names_lst]

    # merge the file.
    opened_file = [open(file_name, 'rb') for file_name in file_names_lst]
    pdfFM = PyPDF2.PdfFileMerger()
    for file in opened_file:
        pdfFM.append(file)

    # output the file.
    with open(mypath + "\\" + name + ".pdf", 'wb') as write_out_file:
        pdfFM.write(write_out_file)

    # close all the input files.
    for file in opened_file:
        file.close()

    print('合并完成 %s' % name)
Example #21
0
def main(target_path, output_fname=None):

    if not os.path.exists(target_path) or not os.path.isdir(target_path):
        sys.exit("Check the target path.\nIt must be an existing directory.\n")

    files_to_merge = [x for x in os.listdir(target_path) if x.endswith(".pdf")]

    print("The following files will be merged:")
    for f in files_to_merge:
        print(" - {} ".format(f))

    if input("\nContinue? ").lower() == 'n':
        sys.exit(1)

    merger = PyPDF2.PdfFileMerger()
    reader = PyPDF2.PdfFileReader

    if output_fname is None:
        out = os.path.abspath(target_path).split('\\')
        output_fname = ''.join([out, "_merged.pdf"])

    for target_file in files_to_merge:
        try:
            target_file = os.path.join(target_path, target_file)

            pdf_obj = open(target_file, 'rb')
            pdf_file = reader(pdf_obj)
            merger.append(pdf_file)

            pdf_obj.close()
        except IOError as io_err:
            print("IOError: {}".format(io_err))
            print("** STACK **\n{}".format(sys.exc_info()[0]))

    merger.write(output_fname)
    merger.close()
Example #22
0
def pdfAppend(yol):
    docAppendname = []
    dosyalar = os.listdir(yol)
    merger = PyPDF2.PdfFileMerger()
    for dosya in dosyalar:
        if dosya.endswith('.pdf'):
            print("DOSYA :" + dosya)
            dosya = dosya.split("_")  #sayfa sayısı
            dosya = dosya[0]
            toplamSayfa = 1
            sayfa = 0
        for pagees in dosyalar:
            if pagees.endswith('.pdf'):
                pagees = pagees.split("_")  #sayfa sayısı
                pagees = pagees[0]
                if pagees == dosya:
                    sayfa += 1
                    #print("KONTROL :"+yol+"{}_{}-{}.pdf".format(pagees,sayfa,toplamSayfa))
                    # pdf_merger.append(yol+"{}_{}-{}.pdf".format(pagees,sayfa,toplamSayfa))
                    # pdf_merger.write("C:/Users/Oğuz KABA/Desktop/_QMS Dosyalar/pdf deneme/sonuc.pdf")
                    # pdf_merger.close()
                    #print("Sayfa :"+str(sayfa))
            toplamSayfa = sayfa
            ss = 0
    for pageess in dosyalar:
        if pageess.endswith('.pdf'):
            pageess = pageess.split("_")  #sayfa sayısı
            pageess = pageess[0]
            if pageess == dosya:
                ss += 1
            #print("KONTROL :"+yol+"{}_{}-{}.pdf".format(pageess,ss,toplamSayfa))
            docAppendname.append(
                yol + "{}_{}-{}.pdf".format(pageess, ss, toplamSayfa))
            # pdf_merger.append(yol+"{}_{}-{}.pdf".format(pageess,ss,toplamSayfa))
            # pdf_merger.write("C:/Users/Oğuz KABA/Desktop/_QMS Dosyalar/pdf deneme/sonuc.pdf")
            print("SON :" + str(docAppendname))
    def merge(self):
        os.chdir(self.rootFolder)
        if self.pdfPath1.endswith('.pdf') and self.pdfPath2.endswith(
                '.pdf') and self.textEdit_5.toPlainText().endswith('.pdf'):

            if self.textEdit_5.toPlainText() in os.listdir():
                self.showError('File already exists')
                return 0

            merger = PyPDF2.PdfFileMerger()
            readerPdf = open(self.pdfPath1, 'rb')
            reader = PyPDF2.PdfFileReader(readerPdf)

            pageNum = -1
            if self.textEdit_4.text():
                pageNum = int(self.textEdit_4.text())
                if -1 < pageNum < reader.getNumPages():
                    merger.append(self.pdfPath1)
                    merger.merge(pageNum, self.pdfPath2)
                else:
                    self.showError('Invalid page number')
                    return 0

            else:
                merger.append(self.pdfPath1)
                merger.append(self.pdfPath2)

            merger.write(self.textEdit_5.toPlainText())
            merger.close()
            readerPdf.close()
            self.label_22.show()
            self.label_23.show()
        else:
            self.showError(
                'Please make sure you have 2 PDF files selected and an output in PDF format.'
            )
Example #24
0
def merge_and_watermark(pdf_list, wtr_pdf):
    # Merge given pdfs
    merger = PyPDF2.PdfFileMerger()
    for pdf in pdf_list:
        merger.append(pdf)
    merger.write("merged.pdf")

    # Read merged pdf
    raw_pdf = PyPDF2.PdfFileReader(open("merged.pdf", "rb"))
    total_pages = raw_pdf.getNumPages()

    # Get watermark pdf
    watermark = PyPDF2.PdfFileReader(open(wtr_pdf, "rb"))
    wtr_page = watermark.getPage(0)

    # Watermark and write pages one by one
    writer = PyPDF2.PdfFileWriter()
    for page_num in range(total_pages):
        raw_page = raw_pdf.getPage(page_num)
        raw_page.mergePage(wtr_page)
        writer.addPage(raw_page)

    with open('merged_watermarked.pdf', 'wb') as output_file:
        writer.write(output_file)
def main():
    ap = ArgumentParser(
        description='Combines several PDF files into one file.')

    # The files to combine
    ap.add_argument('files', metavar='file1 file2 ...',
                    help='Files to combine', nargs='+')

    # The output file (defaults to combined.pdf if not specified)
    ap.add_argument('-o', '--output', nargs='?',
                    const='combined.pdf', default='combined.pdf',
                    help='Output file name (combined.pfd, if not specified)')

    args = ap.parse_args()

    # Workaround for PyPDF2 empty output file: keep input files open
    # See https://stackoverflow.com/a/49927541/336802
    with contextlib.ExitStack() as stack:
        pdfMerger = PyPDF2.PdfFileMerger()
        files = [stack.enter_context(open(pdf, 'rb')) for pdf in args.files]
        for f in files:
            pdfMerger.append(f)
        with open(args.output, 'wb') as f:
            pdfMerger.write(f)
Example #26
0
def merge(files: Sequence[str], output_name: str = None) -> None:
    """
    Combine separate PDF files into a single document.

    Parameters
    ----------
    files : List[str]
        List of file paths to merge.
    output_name : str, optional
        Path (including file name) to where the merged file will be saved.
        Default is None.
    """
    if files == '':
        print('Program stopped by user.')
        return

    files = validate_files_and_handle_error(files)
    output_name = validate_output_name_and_handle_error(
        'merged.pdf' if output_name is None else output_name)

    merger = PyPDF2.PdfFileMerger(strict=False)
    for file in files:
        merger.append(file)
    merger.write(output_name)
Example #27
0
def download(_date):
    d_request = urllib2.Request(index + _date)
    d_request.add_header('User-Agent', header)
    d_html = urllib2.urlopen(d_request).read()
    d_pattern = re.compile(r'/qlwb/pdf/(......../.?.?.?.?.?.?.?\.pdf)')
    d_match = d_pattern.findall(d_html)
    d_pages = len(d_match)

    i = 0
    out = PyPDF2.PdfFileMerger()  # out is the pdf object we precess
    out.strict = False
    os.system('rm -rf ' + workdir + 'data/' + _date)  #
    os.mkdir(workdir + 'data/' + _date)
    # remove tmp folder stores the single page of pdfs and then create it
    for s in d_match:
        i += 1
        pdfpath = workdir + 'data/' + _date + '/' + '%d' % i + '.pdf'
        urllib.urlretrieve(index + s, pdfpath)
        # download a page

        tmppdf = PyPDF2.PdfFileReader(open(pdfpath, 'rb'))
        pdfinfo = tmppdf.getDocumentInfo()
        if ('C0' in pdfinfo.title):
            break
# I only read A pages and B pages of qlwb... So the pages having titil
# 'greater or equal' than C will be discarded

        out.append(pdfpath, False)
        if (i % 10 == 0):
            print 'Downloaded ' + '%d' % i + ' of all ' + '%d' % d_pages + ' pages......'
# simply shows the progress of downloading

    print 'Download complete.Merging,this needs much time......'
    out.write(workdir + _date + '.pdf')
    # finally output the pdf
    print 'Done.'
Example #28
0
def render_wrap_paper_and_net_real_size(vertical, horizon, high, theta,
                                        save_path, g_path, w, h, pw, ph,
                                        scaled):
    tmp_box = ".tmp/save_box.svg"
    tmp_box_pdf = ".tmp/save_box.pdf"
    tmp_wrap_pdf = ".tmp/save_wrap.pdf"
    out = render_net_real_size(vertical, horizon, high, theta, tmp_box)
    save_wrap_paper(w, h, pw, ph, scaled, g_path, out[0], out[1])
    drawing = svg2rlg(tmp_box)
    renderPDF.drawToFile(drawing, tmp_box_pdf)
    try:
        with open(tmp_wrap_pdf, "wb") as f:
            f.write(img2pdf.convert(".tmp/save_wrap_paper.png"))
    except OSError:
        print("file is Opening")
    merger = PyPDF2.PdfFileMerger()
    merger.append(tmp_box_pdf)
    merger.append(tmp_wrap_pdf)
    merger.write(save_path)
    merger.close()
    os.remove(tmp_box)
    os.remove(tmp_box_pdf)
    os.remove(tmp_wrap_pdf)
    os.remove(".tmp/save_wrap_paper.png")
Example #29
0
    pdfs = []
    titles = []
    for pdf, title in pdf_files:
        if ('layer2' in pdf or 'layer3' in pdf) and layer_count == 2:
            continue
        pdfname = '{}_{}.pdf'.format(sch_name, pdf)
        if os.path.exists(pdfname):
            reader = PyPDF2.PdfFileReader(pdfname)
            numpages = reader.getNumPages()
            titles += [title] * numpages
            pdfs.append(pdfname)
    if include_bom:
        pdfs.append('{}_bom.pdf'.format(sch_name))

    print("  Merging PDFs...")
    merger = PyPDF2.PdfFileMerger()
    for pdf in pdfs:
        merger.append(pdf)
    merger.write('{}.pdf'.format(sch_name))

    # Delete the generated pdfs if they exist
    print("  Cleaning up temporary PDF files")
    for pdf, title in pdf_files + [('layer_test', '~')]:
        try:
            rm('{}_{}.pdf'.format(sch_name, pdf))
        except sh.ErrorReturnCode_1:
            pass

    try:
        rm('-f', '{}_bom.pdf'.format(sch_name))
    except sh.ErrorReturnCode_1:
Example #30
0
def engine():
    isbn = isbn_value.get()
    if len(isbn) != 12:
        tkMessageBox.showerror(
            "Errore",
            "La lunghezza del codice ISBN deve essere pari a 12!",
            icon='error')
        isbn_entry.configure(state='normal')
        start_button.configure(state='normal')
    result = tkMessageBox.askquestion(
        "Conferma",
        "Sicuro di voler scaricare questo libro?\n\nISBN: " + isbn,
        icon='question')
    if result != 'yes':
        isbn_entry.configure(state='normal')
        start_button.configure(state='normal')

    def decrypt(data, page):
        data = data.replace('viewer._imgl(' + str(page) + ',"', '').replace(
            '");\nviewer._imgl(' + str(page) + ');', '')
        data = data.decode('string_escape')
        m = "fb69218f41737d7da84b1e39a949dbc2"
        arr = list(data)
        for j in range(3):
            for i in range(95, -1, -1):
                newpos = ord(m[i % 32]) % 96
                f = arr[i]
                s = arr[newpos]
                arr[i] = s
                arr[newpos] = f
        data = ''.join(arr)
        return data

    def download(username, isbn, pagen):
        pageid = "0" * (3 - len(str(pagen))) + str(pagen)
        try:
            content = session.get(
                "http://iflipit.mondadorieducation.it/desktop/index.php?usr="******"&iss=" + isbn + "&fld=sdf&id=" + pageid +
                "&ext=js").content
        except:
            download(username, isbn, pagen)
            return
        data = decrypt(content, pagen)
        pdf = io.BytesIO()
        PIL.Image.Image.save(PIL.Image.open(io.BytesIO(data)),
                             pdf,
                             "PDF",
                             resoultion=100.0)
        pdf_data[pagen] = pdf.getvalue()

    logbox.insert(Tkinter.END, "Inizializzazione")
    session = requests.Session()

    logbox.insert(Tkinter.END, "Login")
    email = "*****@*****.**"
    password = "******"
    html = session.get(
        "https://www.mondadorieducation.it/app/mondadorieducation/login/loginJsonp?username="******"&password="******"&format=json&jsoncallback=jsonp11").text

    if not '"result":"OK"' in html:
        logbox.insert(Tkinter.END, "Login fallito")
        isbn_entry.configure(state='normal')
        start_button.configure(state='normal')
        return

    logbox.insert(Tkinter.END, "Recupero informazioni")
    session.get(
        "http://libropiuweb.mondadorieducation.it/mod_connect/login?urlRitorno=http%3A%2F%2Flibropiuweb.mondadorieducation.it%2F"
    )
    username = re.search('"username":"******"', html).group(1)
    html = session.get(
        "http://iflipit.mondadorieducation.it/desktop/index.php?accesslevel=st-pl&usr="******"&iss=" + isbn + "&fil=iss").text
    try:
        npages = int(re.search('"pagesCount":(.*?),', html).group(1))
    except:
        tkMessageBox.showerror("Errore",
                               "ISBN non valido o non disponibile",
                               icon='error')
        logbox.insert(Tkinter.END, "ISBN non valido o non disponibile")
        isbn_entry.configure(state='normal')
        start_button.configure(state='normal')
        return

    logbox.insert(Tkinter.END, "Inizio scaricamento delle pagine")

    pdf_data = {}

    pagen = 1
    signal = 1
    while signal:
        for i in range(10 - threading.activeCount()):
            if pagen < npages + 1:
                if pagen != 1: logbox.delete(Tkinter.END)
                logbox.insert(
                    Tkinter.END, "Stato download: " + str(pagen) + "/" +
                    str(npages) + " (" + str(((pagen * 100) / npages)) + "%)")
                threading.Thread(target=download,
                                 args=(
                                     username,
                                     isbn,
                                     pagen,
                                 )).start()
                pagen += 1
            else:
                signal = 0
                break

    while 1:
        if threading.activeCount() == 2:
            break

    logbox.insert(Tkinter.END, "Unione PDF")
    merger = PyPDF2.PdfFileMerger()
    for i in range(1, npages + 1):
        merger.append(PyPDF2.PdfFileReader(io.BytesIO(pdf_data[i])))
    merger.write(isbn + ".pdf")
    logbox.insert(Tkinter.END, "Libro scaricato con successo!")
    isbn_entry.configure(state='normal')
    start_button.configure(state='normal')