Python get_num_pages Examples, pypdftk.get_num_pages Python Examples

Example #1

0

Show file

File: test.py Project: revolunet/pypdftk

 def test_split(self):
     total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
     paths = pypdftk.split(TEST_PDF_PATH)
     self.assertEqual(len(paths) - 1, total_pages)
     self.assertTrue('doc_data.txt' in paths[0])
     for p in paths:
         self.assertTrue(os.path.exists(p))

Example #2

0

Show file

 def test_split(self):
     total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
     paths = pypdftk.split(TEST_PDF_PATH)
     self.assertEqual(len(paths) - 1, total_pages)
     self.assertTrue('doc_data.txt' in paths[0])
     for p in paths:
         self.assertTrue(os.path.exists(p))

Example #3

0

Show file

 def test_split_output_dir(self):
     output_dir = mkdtemp()
     total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
     paths = pypdftk.split(TEST_PDF_PATH, out_dir=output_dir)
     self.assertEqual(len(paths) - 1, total_pages)
     for p in paths:
         out_path = os.path.join(output_dir, os.path.basename(p))
         self.assertTrue(out_path)

Example #4

0

Show file

File: test.py Project: revolunet/pypdftk

 def test_split_output_dir(self):
     output_dir = mkdtemp()
     total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
     paths = pypdftk.split(TEST_PDF_PATH, out_dir=output_dir)
     self.assertEqual(len(paths) - 1, total_pages)
     for p in paths:
         out_path = os.path.join(output_dir, os.path.basename(p))
         self.assertTrue(out_path)

Example #5

0

Show file

def add_custom(original_pdf, custom_pdf, output_pdf):
    output_pdf_aux = output_pdf + '.aux'
    pypdftk.stamp(original_pdf, custom_pdf, output_pdf_aux)
    num_pages = pypdftk.get_num_pages(original_pdf)
    if num_pages > 1:
        pypdftk.add_custom(output_pdf_aux, original_pdf, output_pdf)
        os.unlink(output_pdf_aux)
    else:
        os.rename(output_pdf_aux, output_pdf)

Example #6

0

Show file

File: views.py Project: fmfi-svt/eprihlaska

def admin_print(id):
    import pypdftk  # noqa
    import tempfile

    app = ApplicationForm.query.get(id)

    rendered = render_app(app, print=True)
    pdf = generate_pdf(rendered, options={'quiet': ''})

    with tempfile.NamedTemporaryFile(delete=False) as fp:
        fp.write(pdf)

    n_pages = pypdftk.get_num_pages(fp.name)
    n_blank_pages = 3 - n_pages
    blank_path = consts.DIR + '/data/blank_A4.pdf'

    paths_to_concat = [fp.name]
    paths_to_concat += [blank_path] * n_blank_pages
    paths_to_concat += [consts.DIR + '/data/protokol.pdf']

    concated_file = pypdftk.concat(paths_to_concat)
    with open(concated_file, 'rb') as out_f:
        output = out_f.read()

    # remove the temp file
    os.unlink(fp.name)

    # mark the application as 'printed' in the DB
    app.state = ApplicationStates.printed
    app.printed_at = datetime.datetime.now()
    db.session.commit()

    response = make_response(output)
    response.headers['Content-Type'] = 'application/pdf'
    disposition = 'inline; filename=application_form_{}.pdf'.format(id)
    response.headers['Content-Disposition'] = disposition
    return response

Example #7

0

Show file

 def test_get_num_pages(self):
     num = pypdftk.get_num_pages(TEST_PDF_PATH)
     self.assertEqual(num, 129)

Example #8

0

Show file

 def test_replace_page_at_end(self):
     total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
     last_page = pypdftk.get_num_pages(TEST_PDF_PATH)
     pdf_to_insert = 'test_files/page_01.pdf'
     pypdftk.replace_page(TEST_PDF_PATH, last_page, pdf_to_insert)
     self.assertEqual(total_pages, pypdftk.get_num_pages(TEST_PDF_PATH))

Example #9

0

Show file

File: test.py Project: revolunet/pypdftk

 def test_get_num_pages(self):
     num = pypdftk.get_num_pages(TEST_PDF_PATH)
     self.assertEqual(num, 129)

Example #10

0

Show file

def print_f99_pdftk_html(
    stamp_print="",
    paginate=False,
    begin_image_num=None,
    page_count=False,
    file_content=None,
    silent_print=False,
    filing_timestamp=None,
    rep_id=None,
    attachment_file_content=None,
):
    # check if json_file is in the request
    # HTML("templates/forms/test.html").write_pdf("output/pdf/test/test.pdf")
    # HTML(string='''<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><div><b>This is bold text</b></div><div><u>This is underline text</u></div><div><i>This is italics text</i><u><br></u></div><div align='center'><u>Title</u></div><div align='left'><u><br></u></div><ol><li>one</li><li>two</li><li>three</li></ol>''').write_pdf("output/pdf/test/test.pdf")
    # pdfkit.from_file("templates/forms/test.html", "output/pdf/test/test.pdf")
    # pypdftk.stamp(current_app.config['FORM_TEMPLATES_LOCATION'].format('F99'), "output/pdf/test/test.pdf", "output/pdf/test/output.pdf")
    try:
        silent_print = silent_print
        txn_img_num = begin_image_num
        filing_timestamp = filing_timestamp

        if ((page_count and file_content) or
            ((paginate or silent_print) and file_content and begin_image_num)
                or (not paginate and "json_file" in request.files)):

            if page_count and file_content:
                json_file_md5 = md5_for_text(file_content)
                json_data = json.loads(file_content)

            elif (paginate
                  or silent_print) and file_content and begin_image_num:
                # generate md5 for file_content
                json_file_md5 = md5_for_text(file_content)
                json_data = json.loads(file_content)

            elif not paginate and "json_file" in request.files:
                json_file = request.files.get("json_file")
                silent_print = (True if request.form.get("silent_print")
                                and request.form.get("silent_print").lower()
                                in ["true", "1"] else False)
                page_count = (True if request.form.get("page_count")
                              and request.form.get("page_count").lower()
                              in ["true", "1"] else False)

                if silent_print:
                    txn_img_num = request.form.get("begin_image_num", None)

                    if not txn_img_num:
                        if flask.request.method == "POST":
                            envelope = common.get_return_envelope(
                                "false",
                                "begin_image_num is missing from your request")
                            status_code = status.HTTP_400_BAD_REQUEST
                            return flask.jsonify(**envelope), status_code
                    txn_img_num = int(txn_img_num)

                    filing_timestamp = request.form.get(
                        "filing_timestamp", None)

                json_file_md5 = md5_for_file(json_file)
                json_file.stream.seek(0)

                # save json file as md5 file name
                json_file.save(
                    current_app.config["REQUEST_FILE_LOCATION"].format(
                        json_file_md5))

                # load json file
                json_data = json.load(
                    open(current_app.config["REQUEST_FILE_LOCATION"].format(
                        json_file_md5)))

            md5_directory = current_app.config["OUTPUT_DIR_LOCATION"].format(
                json_file_md5)

            # if paginate or page_count is True and directory exist then don't remove it
            is_dir_exist = False
            if os.path.isdir(md5_directory):
                is_dir_exist = True

            os.makedirs(md5_directory, exist_ok=True)
            # os.makedirs(md5_directory + "images", exist_ok=True)
            if not os.path.exists(md5_directory + "images"):
                shutil.copytree("templates/forms/F99/images",
                                md5_directory + "images")
            shutil.copyfile("templates/forms/F99/form-text.css",
                            md5_directory + "form-text.css")
            infile = current_app.config["HTML_FORM_TEMPLATES_LOCATION"].format(
                "template")
            outfile = md5_directory + json_file_md5 + ".html"

            form99_json_data = json_data["data"]

            with open(infile) as inf:
                txt = inf.read()
                soup = bs4.BeautifulSoup(txt, features="html5lib")
                soup.find("label", attrs={
                    "id": "committeeName"
                }).string = form99_json_data["committeeName"]
                soup.find("label", attrs={
                    "id": "street1"
                }).string = form99_json_data["street1"]
                soup.find("label", attrs={
                    "id": "street2"
                }).string = form99_json_data["street2"]
                soup.find("label", attrs={
                    "id": "city"
                }).string = form99_json_data["city"]
                soup.find("label", attrs={
                    "id": "state"
                }).string = form99_json_data["state"]
                soup.find("label", attrs={
                    "id": "zipCode"
                }).string = form99_json_data["zipCode"]
                soup.find("span", attrs={
                    "id": "committeeId"
                }).string = form99_json_data["committeeId"]

                name_list = [
                    "LastName", "FirstName", "MiddleName", "Prefix", "Suffix"
                ]

                treasurerFullName = ""
                for item in name_list:
                    item = "treasurer" + item
                    if form99_json_data.get(item):
                        treasurerFullName += form99_json_data.get(item) + ", "
                soup.find("label", attrs={
                    "id": "treasurerFullName"
                }).string = treasurerFullName[:-2]

                soup.find("label", attrs={
                    "id": "treasurerName"
                }).string = ((form99_json_data.get("treasurerLastName", "") +
                              ", " +
                              form99_json_data.get("treasurerFirstName", "")
                              ).strip().rstrip(",").strip())

                f99_html_data = form99_json_data["text"]
                soup.find("label", attrs={"id": "text"}).string = f99_html_data
                soup.find("label", attrs={
                    "id": form99_json_data["reason"]
                }).string = "X"

                date_array = form99_json_data["dateSigned"].split("/")
                soup.find("span", attrs={
                    "id": "dateSignedMonth"
                }).string = str(date_array[0])
                soup.find("span", attrs={
                    "id": "dateSignedDate"
                }).string = str(date_array[1])
                soup.find("span", attrs={
                    "id": "dateSignedYear"
                }).string = str(date_array[2])

                with open(outfile, "w") as output_file:
                    output_file.write(
                        str(soup).replace("&lt;", "<").replace("&gt;", ">"))

                # F99 PDF page padding options
                options = {
                    "margin-top": "0.40in",
                    "margin-right": "0.20in",
                    "margin-bottom": "0.40in",
                    "margin-left": "0.20in",
                }

                # HTML(outfile).write_pdf(md5_directory + json_file_md5 + '.pdf', stylesheets=[CSS(current_app.config['FORMS_LOCATION'].format('F99.css'))])
                pdfkit.from_file(outfile,
                                 md5_directory + json_file_md5 + ".pdf",
                                 options=options)
                # pdfkit.from_file(outfile, md5_directory + json_file_md5 + '.pdf')

                total_no_of_pages = pypdftk.get_num_pages(md5_directory +
                                                          json_file_md5 +
                                                          ".pdf")

            # checking if attachment_file exist
            if ((paginate or page_count) and attachment_file_content) or (
                    not paginate and "attachment_file" in request.files):
                # reading Attachment title file
                attachment_title_file = current_app.config[
                    "FORM_TEMPLATES_LOCATION"].format("Attachment_Title")

                if (paginate or page_count) and attachment_file_content:
                    attachment_file = json.loads(attachment_file_content)
                else:
                    attachment_file = request.files.get("attachment_file")

                attachment_file.save(
                    os.path.join(md5_directory + "attachment_temp.pdf"))
                os.makedirs(md5_directory + "attachment", exist_ok=True)
                os.makedirs(md5_directory + "final_attachment", exist_ok=True)
                pypdftk.split(md5_directory + "attachment_temp.pdf",
                              md5_directory + "attachment")
                os.remove(md5_directory + "attachment/doc_data.txt")
                attachment_no_of_pages = pypdftk.get_num_pages(
                    os.path.join(md5_directory + "attachment_temp.pdf"))
                attachment_page_no = total_no_of_pages
                total_no_of_pages += attachment_no_of_pages

                # we are doing this to assign page numbers to attachment file
                for filename in os.listdir(md5_directory + "attachment"):
                    attachment_page_no += 1
                    page_dict = {}
                    page_dict["PAGESTR"] = ("PAGE " + str(attachment_page_no) +
                                            " / " + str(total_no_of_pages))

                    if silent_print:
                        page_dict["IMGNO"] = txn_img_num + attachment_page_no

                    pypdftk.fill_form(
                        attachment_title_file,
                        md5_directory + "attachment/attachment_page_" +
                        str(attachment_page_no) + ".pdf",
                    )
                    pypdftk.stamp(
                        md5_directory + "attachment/" + filename,
                        md5_directory + "attachment/attachment_page_" +
                        str(attachment_page_no) + ".pdf",
                        md5_directory + "final_attachment/attachment_" +
                        str(attachment_page_no) + ".pdf",
                    )
                pypdftk.concat(
                    directory_files(md5_directory + "final_attachment/"),
                    md5_directory + "attachment.pdf",
                )
                os.remove(md5_directory + "attachment_temp.pdf")

            os.makedirs(md5_directory + "pages", exist_ok=True)
            os.makedirs(md5_directory + "final_pages", exist_ok=True)
            pypdftk.split(md5_directory + json_file_md5 + ".pdf",
                          md5_directory + "pages")
            os.remove(md5_directory + "pages/doc_data.txt")

            f99_page_no = 1
            for filename in os.listdir(md5_directory + "pages"):
                page_dict = {}
                page_dict["PAGESTR"] = ("PAGE " + str(f99_page_no) + " / " +
                                        str(total_no_of_pages))

                if silent_print:
                    page_dict["IMGNO"] = txn_img_num
                    txn_img_num += 1
                    # need to print timestamp on first page only
                    if filing_timestamp and f99_page_no == 1:
                        page_dict["FILING_TIMESTAMP"] = filing_timestamp

                page_number_file = current_app.config[
                    "FORM_TEMPLATES_LOCATION"].format("Page_Number")
                pypdftk.fill_form(
                    page_number_file,
                    page_dict,
                    md5_directory + "pages/page_number_" +
                    str(f99_page_no).zfill(6) + ".pdf",
                )
                pypdftk.stamp(
                    md5_directory + "pages/page_number_" +
                    str(f99_page_no).zfill(6) + ".pdf",
                    md5_directory + "pages/" + filename,
                    md5_directory + "final_pages/page_" +
                    str(f99_page_no).zfill(6) + ".pdf",
                )
                f99_page_no += 1

            pypdftk.concat(
                directory_files(md5_directory + "final_pages/"),
                json_file_md5 + "_temp.pdf",
            )

            if ((paginate or page_count) and attachment_file_content) or (
                    not paginate and "attachment_file" in request.files):
                pypdftk.concat(
                    [
                        json_file_md5 + "_temp.pdf",
                        md5_directory + "attachment.pdf"
                    ],
                    md5_directory + "all_pages.pdf",
                )
                shutil.rmtree(md5_directory + "attachment")
                shutil.rmtree(md5_directory + "final_attachment")
                os.remove(md5_directory + "attachment.pdf")
            else:
                shutil.move(json_file_md5 + "_temp.pdf",
                            md5_directory + "all_pages.pdf")

            # clean up task
            shutil.rmtree(md5_directory + "pages")
            shutil.rmtree(md5_directory + "final_pages")
            os.remove(md5_directory + json_file_md5 + ".pdf")

            # if flask.request.method == "POST":

            response = {
                # 'file_name': ent_app.conf'{}.pdf'.format(json_file_md5),
                "total_pages": total_no_of_pages,
            }

            if not page_count and not paginate:
                s3 = boto3.client("s3")
                extraArgs = {
                    "ContentType": "application/pdf",
                    "ACL": "public-read"
                }

                if silent_print:
                    response["pdf_url"] = current_app.config[
                        'S3_FILE_URL'] + rep_id + '.pdf'
                    s3.upload_file(
                        md5_directory + 'all_pages.pdf',
                        current_app.
                        config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'],
                        current_app.config['AWS_FECFILE_OUTPUT_DIRECTORY'] +
                        '/' + str(rep_id) + '.pdf',
                        ExtraArgs=extraArgs)
                else:
                    response["pdf_url"] = (
                        current_app.config["PRINT_OUTPUT_FILE_URL"].format(
                            json_file_md5) + "all_pages.pdf", )

                    s3.upload_file(
                        md5_directory + "all_pages.pdf",
                        current_app.
                        config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"],
                        md5_directory + "all_pages.pdf",
                        ExtraArgs=extraArgs,
                    )
            else:
                if not is_dir_exist:
                    shutil.rmtree(md5_directory)
                if paginate:
                    txn_img_json = {
                        "summary": {
                            "committeeId":
                            form99_json_data.get("committeeId", None),
                            "begin_image_num":
                            begin_image_num,
                            "end_image_num":
                            txn_img_num
                        }
                    }
                    response["txn_img_json"] = txn_img_json

            envelope = common.get_return_envelope(data=response)
            status_code = (status.HTTP_200_OK if page_count or paginate else
                           status.HTTP_201_CREATED)
            return flask.jsonify(**envelope), status_code

            # elif page_count or paginate:
            #     if not is_dir_exist:
            #         shutil.rmtree(md5_directory)
            #         response = {
            #             "total_pages": total_no_of_pages,
            #         }
            #     elif paginate:
            #         txn_img_json = {
            #             'summary' : {
            #                 'committeeId': form99_json_data.get('committeeId', None)
            #             }
            #         }
            #         response['txn_img_json'] = txn_img_json
            #     return True, response
            # elif silent_print and not flask.request.method == "POST":
            #     return True, {}
        else:
            if paginate or page_count or silent_print:
                envelope = common.get_return_envelope(False, "")
            else:
                # elif flask.request.method == "POST":
                envelope = common.get_return_envelope(
                    False, "json_file is missing from your request")
            return flask.jsonify(**envelope), status.HTTP_400_BAD_REQUEST
    except Exception as e:
        traceback.print_exception(*sys.exc_info())
        return error("Error generating print preview, error message: " +
                     str(e))

Example #11

0

Show file

 def test_get_pages_single_range(self):
     pageRanges = [[1], [2, 5]]
     output_file = pypdftk.get_pages(TEST_PDF_PATH, pageRanges)
     concat_total_pages = pypdftk.get_num_pages(output_file)
     self.assertEqual(rangeCount(pageRanges), concat_total_pages)

Example #12

0

Show file

#files = []
#for filename in glob.glob('*.pdf'):
#                files.append(filename)

myDir = "."
files = []
for root, dirnames, filenames in os.walk(myDir):
    files.extend(glob.glob(root + "/*.pdf"))

#dirList=os.listdir(file_path) #list all the files in the directories
file_write = open('pdf_stats_' + timestamp + '.csv',
                  'w')  #writing no.of pages into a csv file
file_write.write("No~FileName~PageCount~Size~")
file_write.write("\n")
file_write.write("\n")
print "\n\n"
counter = 1
for fname in files:
    data_find = str(counter) + '~' + fname + '~' + str(
        pypdftk.get_num_pages(fname)) + '~' + humanize.naturalsize(
            os.path.getsize(
                fname)) + '~'  # giving file path with the name of the file
    print data_find  # test with printing the data
    file_write.write(str(data_find))
    file_write.write("\n")
    counter = counter + 1
file_write.close()

print "\nWrote the PDF stats to the file " + 'pdf_stats_' + timestamp + '.csv' + '\n\n'

Example #13

0

Show file

File: pdf_stats.py Project: Dineshkumar-Ponnusamy/tools-for-wiki

#files = []
#for filename in glob.glob('*.pdf'):
#                files.append(filename)



myDir = "."
files = []
for root, dirnames, filenames in os.walk(myDir):
    files.extend(glob.glob(root + "/*.pdf"))



#dirList=os.listdir(file_path) #list all the files in the directories
file_write = open('pdf_stats_' + timestamp + '.csv', 'w') #writing no.of pages into a csv file
file_write.write("No~FileName~PageCount~Size~")
file_write.write("\n")
file_write.write("\n")
print "\n\n"
counter = 1
for fname in files:
	data_find = str(counter) + '~' + fname +'~'+ str(pypdftk.get_num_pages(fname)) + '~' + humanize.naturalsize(os.path.getsize(fname)) + '~'  # giving file path with the name of the file
	print data_find # test with printing the data
	file_write.write(str(data_find)) 
	file_write.write("\n")	
	counter = counter + 1
file_write.close()

print "\nWrote the PDF stats to the file " + 'pdf_stats_' + timestamp + '.csv' + '\n\n'

Example #14

0

Show file

File: test.py Project: revolunet/pypdftk

 def test_replace_page_at_middle(self):
     total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
     pdf_to_insert = 'test_files/page_01.pdf'
     pypdftk.replace_page(TEST_PDF_PATH, 3, pdf_to_insert)
     self.assertEqual(total_pages, pypdftk.get_num_pages(TEST_PDF_PATH))

Example #15

0

Show file

File: test.py Project: revolunet/pypdftk

 def test_concat(self):
     total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
     output_file = pypdftk.concat([TEST_PDF_PATH, TEST_PDF_PATH, TEST_PDF_PATH])
     concat_total_pages = pypdftk.get_num_pages(output_file)
     self.assertEqual(total_pages * 3, concat_total_pages)

Example #16

0

Show file

 def test_concat(self):
     total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
     output_file = pypdftk.concat(
         [TEST_PDF_PATH, TEST_PDF_PATH, TEST_PDF_PATH])
     concat_total_pages = pypdftk.get_num_pages(output_file)
     self.assertEqual(total_pages * 3, concat_total_pages)

Example #17

0

Show file

 def test_get_pages_clone(self):
     total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
     output_file = pypdftk.get_pages(TEST_PDF_PATH, [])
     concat_total_pages = pypdftk.get_num_pages(output_file)
     self.assertEqual(total_pages, concat_total_pages)

Example #18

0

Show file

def print_f99_pdftk(stamp_print):
    # check if json_file is in the request

    if 'json_file' in request.files:
        total_no_of_pages = 1
        page_no = 1
        json_file = request.files.get('json_file')
        # generate md5 for json file
        json_file_md5 = utils.md5_for_file(json_file)
        json_file.stream.seek(0)
        md5_directory = current_app.config['OUTPUT_DIR_LOCATION'].format(
            json_file_md5)
        os.makedirs(md5_directory, exist_ok=True)
        infile = current_app.config['FORM_TEMPLATES_LOCATION'].format('F99')
        # save json file as md5 file name
        json_file.save(
            current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5))
        outfile = md5_directory + json_file_md5 + '_temp.pdf'
        json_data = json.load(
            open(current_app.config['REQUEST_FILE_LOCATION'].format(
                json_file_md5)))
        # setting timestamp and imgno to empty as these needs to show up after submission
        if stamp_print != 'stamp':
            json_data['FILING_TIMESTAMP'] = ''
            json_data['IMGNO'] = ''

        f99_pages_text_json = json.loads(split_f99_text_pages(json_data))
        json_data['MISCELLANEOUS_TEXT'] = f99_pages_text_json['main_page']
        total_no_of_pages += len(f99_pages_text_json['additional_pages'])
        # checking if attachment_file exist
        if 'attachment_file' in request.files:
            # reading Attachment title file
            attachment_title_file = current_app.config[
                'FORM_TEMPLATES_LOCATION'].format('Attachment_Title')
            attachment_file = request.files.get('attachment_file')
            attachment_file.save(
                os.path.join(md5_directory + 'attachment_temp.pdf'))
            os.makedirs(md5_directory + 'attachment', exist_ok=True)
            os.makedirs(md5_directory + 'final_attachment', exist_ok=True)
            pypdftk.split(md5_directory + 'attachment_temp.pdf',
                          md5_directory + 'attachment')
            os.remove(md5_directory + 'attachment/doc_data.txt')
            attachment_no_of_pages = pypdftk.get_num_pages(
                os.path.join(md5_directory + 'attachment_temp.pdf'))
            attachment_page_no = total_no_of_pages
            total_no_of_pages += attachment_no_of_pages

            # we are doing this to assign page numbers to attachment file
            for filename in os.listdir(md5_directory + 'attachment'):
                attachment_page_no += 1
                pypdftk.fill_form(
                    attachment_title_file, {
                        "PAGESTR":
                        "PAGE " + str(attachment_page_no) + " / " +
                        str(total_no_of_pages)
                    }, md5_directory + 'attachment/attachment_page_' +
                    str(attachment_page_no) + '.pdf')
                pypdftk.stamp(
                    md5_directory + 'attachment/' + filename,
                    md5_directory + 'attachment/attachment_page_' +
                    str(attachment_page_no) + '.pdf',
                    md5_directory + 'final_attachment/attachment_' +
                    str(attachment_page_no) + '.pdf')
            pypdftk.concat(
                directory_files(md5_directory + 'final_attachment/'),
                md5_directory + 'attachment.pdf')
            os.remove(md5_directory + 'attachment_temp.pdf')
            shutil.rmtree(md5_directory + 'attachment')
            shutil.rmtree(md5_directory + 'final_attachment')

        json_data['PAGESTR'] = "PAGE " + str(page_no) + " / " + str(
            total_no_of_pages)

        pypdftk.fill_form(infile, json_data, outfile, flatten=False)
        additional_page_counter = 0
        if len(f99_pages_text_json['additional_pages']) > 0:
            continuation_file = current_app.config[
                'FORM_TEMPLATES_LOCATION'].format('F99_CONT')
            os.makedirs(md5_directory + 'merge', exist_ok=True)
            for additional_page in f99_pages_text_json['additional_pages']:
                page_no += 1
                continuation_outfile = md5_directory + 'merge/' + str(
                    additional_page_counter) + '.pdf'
                pypdftk.fill_form(
                    continuation_file, {
                        "PAGESTR":
                        "PAGE " + str(page_no) + " / " +
                        str(total_no_of_pages),
                        "CONTINOUS_TEXT":
                        additional_page[str(additional_page_counter)]
                    }, continuation_outfile)
                pypdftk.concat([outfile, continuation_outfile], md5_directory +
                               json_file_md5 + '_all_pages_temp.pdf')
                shutil.copy(
                    md5_directory + json_file_md5 + '_all_pages_temp.pdf',
                    outfile)
                additional_page_counter += 1
                os.remove(md5_directory + json_file_md5 +
                          '_all_pages_temp.pdf')

        # Add the F99 attachment
        if 'attachment_file' in request.files:
            pypdftk.concat([outfile, md5_directory + 'attachment.pdf'],
                           md5_directory + 'all_pages.pdf')
            os.remove(md5_directory + 'attachment.pdf')
        else:
            shutil.copy(outfile, md5_directory + 'all_pages.pdf')
        os.remove(md5_directory + json_file_md5 + '_temp.pdf')
        # push output file to AWS
        s3 = boto3.client('s3')
        s3.upload_file(
            md5_directory + 'all_pages.pdf',
            current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'],
            md5_directory + 'all_pages.pdf',
            ExtraArgs={
                'ContentType': "application/pdf",
                'ACL': "public-read"
            })
        response = {
            # 'file_name': '{}.pdf'.format(json_file_md5),
            'pdf_url':
            current_app.config['PRINT_OUTPUT_FILE_URL'].format(json_file_md5) +
            'all_pages.pdf'
        }

        if flask.request.method == "POST":
            envelope = common.get_return_envelope(data=response)
            status_code = status.HTTP_201_CREATED
            return flask.jsonify(**envelope), status_code

    else:

        if flask.request.method == "POST":
            envelope = common.get_return_envelope(
                'false', 'JSON file is missing from your request')
            status_code = status.HTTP_400_BAD_REQUEST
            return flask.jsonify(**envelope), status_code

Example #19

0

Show file

File: test_file.py Project: pmartynov/pypdftk

def test_num_pages():
    input_file = "./Out/some_file.pdf"
    num_pgs = pypdftk.get_num_pages(input_file)
    print num_pgs

Example #20

0

Show file

def print_f99_pdftk_html(stamp_print):
    # check if json_file is in the request
    # HTML("templates/forms/test.html").write_pdf("output/pdf/test/test.pdf")
    # HTML(string='''<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><div><b>This is bold text</b></div><div><u>This is underline text</u></div><div><i>This is italics text</i><u><br></u></div><div align='center'><u>Title</u></div><div align='left'><u><br></u></div><ol><li>one</li><li>two</li><li>three</li></ol>''').write_pdf("output/pdf/test/test.pdf")
    # pdfkit.from_file("templates/forms/test.html", "output/pdf/test/test.pdf")
    # pypdftk.stamp(current_app.config['FORM_TEMPLATES_LOCATION'].format('F99'), "output/pdf/test/test.pdf", "output/pdf/test/output.pdf")

    if 'json_file' in request.files:
        total_no_of_pages = 1
        page_no = 1
        json_file = request.files.get('json_file')
        # generate md5 for json file
        json_file_md5 = utils.md5_for_file(json_file)
        json_file.stream.seek(0)
        md5_directory = current_app.config['OUTPUT_DIR_LOCATION'].format(
            json_file_md5)
        os.makedirs(md5_directory, exist_ok=True)
        # os.makedirs(md5_directory + "images", exist_ok=True)
        if not os.path.exists(md5_directory + "images"):
            shutil.copytree("templates/forms/F99/images",
                            md5_directory + "images")
        shutil.copyfile("templates/forms/F99/form-text.css",
                        md5_directory + "form-text.css")
        infile = current_app.config['HTML_FORM_TEMPLATES_LOCATION'].format(
            'template')
        json_file.save(
            current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5))
        outfile = md5_directory + json_file_md5 + '.html'
        json_data = json.load(
            open(current_app.config['REQUEST_FILE_LOCATION'].format(
                json_file_md5)))
        form99_json_data = json_data['data']
        # load the file
        with open(infile) as inf:
            txt = inf.read()
            soup = bs4.BeautifulSoup(txt)
            soup.find('label', attrs={
                'id': 'committeeName'
            }).string = form99_json_data['committeeName']
            soup.find('label', attrs={
                'id': 'street1'
            }).string = form99_json_data['street1']
            soup.find('label', attrs={
                'id': 'street2'
            }).string = form99_json_data['street2']
            soup.find('label', attrs={
                'id': 'city'
            }).string = form99_json_data['city']
            soup.find('label', attrs={
                'id': 'state'
            }).string = form99_json_data['state']
            soup.find('label', attrs={
                'id': 'zipCode'
            }).string = form99_json_data['zipCode']
            soup.find('span', attrs={
                'id': 'committeeId'
            }).string = form99_json_data['committeeId']
            soup.find('label', attrs={'id': 'treasurerFullName'}).string = form99_json_data['treasurerLastName'] + \
                                                                           ', ' + form99_json_data['treasurerFirstName'] \
                                                                           + ', ' + form99_json_data['treasurerMiddleName'] \
                                                                           + ', ' + form99_json_data['treasurerPrefix'] \
                                                                           + ', ' + form99_json_data['treasurerSuffix']
            soup.find('label', attrs={'id': 'treasurerName'}).string = form99_json_data['treasurerLastName'] + \
                                                                       ', ' + form99_json_data['treasurerFirstName']
            f99_html_data = form99_json_data['text']
            soup.find('label', attrs={'id': 'text'}).string = f99_html_data
            soup.find('label', attrs={
                'id': form99_json_data['reason']
            }).string = 'X'

            date_array = form99_json_data['dateSigned'].split("/")
            soup.find('span', attrs={
                'id': 'dateSignedMonth'
            }).string = str(date_array[0])
            soup.find('span', attrs={
                'id': 'dateSignedDate'
            }).string = str(date_array[1])
            soup.find('span', attrs={
                'id': 'dateSignedYear'
            }).string = str(date_array[2])

            with open(outfile, "w") as output_file:
                output_file.write(
                    str(soup).replace("&lt;", "<").replace("&gt;", ">"))

            # F99 PDF page padding options
            options = {
                'margin-top': '0.36in',
                'margin-right': '0.25in',
                'margin-bottom': '0.39in',
                'margin-left': '0.25in'
            }
            # HTML(outfile).write_pdf(md5_directory + json_file_md5 + '.pdf', stylesheets=[CSS(current_app.config['FORMS_LOCATION'].format('F99.css'))])
            pdfkit.from_file(outfile,
                             md5_directory + json_file_md5 + '.pdf',
                             options=options)

            total_no_of_pages = pypdftk.get_num_pages(md5_directory +
                                                      json_file_md5 + '.pdf')
            page_number_file = current_app.config[
                'FORM_TEMPLATES_LOCATION'].format('Page_Number')

        # checking if attachment_file exist
        if 'attachment_file' in request.files:
            # reading Attachment title file
            attachment_title_file = current_app.config[
                'FORM_TEMPLATES_LOCATION'].format('Attachment_Title')
            attachment_file = request.files.get('attachment_file')
            attachment_file.save(
                os.path.join(md5_directory + 'attachment_temp.pdf'))
            os.makedirs(md5_directory + 'attachment', exist_ok=True)
            os.makedirs(md5_directory + 'final_attachment', exist_ok=True)
            pypdftk.split(md5_directory + 'attachment_temp.pdf',
                          md5_directory + 'attachment')
            os.remove(md5_directory + 'attachment/doc_data.txt')
            attachment_no_of_pages = pypdftk.get_num_pages(
                os.path.join(md5_directory + 'attachment_temp.pdf'))
            attachment_page_no = total_no_of_pages
            total_no_of_pages += attachment_no_of_pages

            # we are doing this to assign page numbers to attachment file
            for filename in os.listdir(md5_directory + 'attachment'):
                attachment_page_no += 1
                pypdftk.fill_form(
                    attachment_title_file, {
                        "PAGESTR":
                        "PAGE " + str(attachment_page_no) + " / " +
                        str(total_no_of_pages)
                    }, md5_directory + 'attachment/attachment_page_' +
                    str(attachment_page_no) + '.pdf')
                pypdftk.stamp(
                    md5_directory + 'attachment/' + filename,
                    md5_directory + 'attachment/attachment_page_' +
                    str(attachment_page_no) + '.pdf',
                    md5_directory + 'final_attachment/attachment_' +
                    str(attachment_page_no) + '.pdf')
            pypdftk.concat(
                directory_files(md5_directory + 'final_attachment/'),
                md5_directory + 'attachment.pdf')
            os.remove(md5_directory + 'attachment_temp.pdf')
            # shutil.rmtree(md5_directory + 'attachment')
            # shutil.rmtree(md5_directory + 'final_attachment')
            # pypdftk.concat([md5_directory + json_file_md5 + '.pdf', md5_directory + 'attachment.pdf'], md5_directory + 'all_pages_temp.pdf')
        # else:
        #     shutil.move(md5_directory + json_file_md5 + '.pdf', md5_directory + 'all_pages_temp.pdf')
        os.makedirs(md5_directory + 'pages', exist_ok=True)
        os.makedirs(md5_directory + 'final_pages', exist_ok=True)
        pypdftk.split(md5_directory + json_file_md5 + '.pdf',
                      md5_directory + 'pages')
        os.remove(md5_directory + 'pages/doc_data.txt')
        f99_page_no = 1
        for filename in os.listdir(md5_directory + 'pages'):
            pypdftk.fill_form(
                page_number_file, {
                    "PAGESTR":
                    "PAGE " + str(f99_page_no) + " / " + str(total_no_of_pages)
                }, md5_directory + 'pages/page_number_' + str(f99_page_no) +
                '.pdf')
            pypdftk.stamp(
                md5_directory + 'pages/page_number_' + str(f99_page_no) +
                '.pdf', md5_directory + 'pages/' + filename, md5_directory +
                'final_pages/page_' + str(f99_page_no) + '.pdf')
            f99_page_no += 1

        pypdftk.concat(directory_files(md5_directory + 'final_pages/'),
                       json_file_md5 + '_temp.pdf')

        if 'attachment_file' in request.files:
            pypdftk.concat([
                json_file_md5 + '_temp.pdf', md5_directory + 'attachment.pdf'
            ], md5_directory + 'all_pages.pdf')
            shutil.rmtree(md5_directory + 'attachment')
            shutil.rmtree(md5_directory + 'final_attachment')
            os.remove(md5_directory + 'attachment.pdf')
        else:
            shutil.move(json_file_md5 + '_temp.pdf',
                        md5_directory + 'all_pages.pdf')

        # clean up task
        shutil.rmtree(md5_directory + 'pages')
        shutil.rmtree(md5_directory + 'final_pages')
        # os.remove(md5_directory + json_file_md5 + '.html')
        # shutil.rmtree(md5_directory + 'images')
        # os.remove(md5_directory + 'form-text.css')
        os.remove(md5_directory + json_file_md5 + '.pdf')

        # for f99_page_no in range(f99_no_of_pages):
        #     pypdftk.fill_form(page_number_file,
        #                   {"PAGESTR": "PAGE " + str(f99_page_no+1) + " / " + str(total_no_of_pages)},
        #                   md5_directory + 'pages/page_' + str(f99_page_no+1) + '.pdf')
        #     pypdftk.stamp(md5_directory + json_file_md5 + '.pdf', md5_directory +
        #                   'pages/page_' + str(f99_page_no+1) + '.pdf', md5_directory + json_file_md5 + '_temp.pdf')

        # json_data['PAGESTR'] = "PAGE " + str(page_no) + " / " + str(total_no_of_pages)

        # json_data['MISCELLANEOUS_TEXT'] = ''
        # xfdf_path = pypdftk.gen_xfdf(json_data)
        # pypdftk.fill_form(infile, json_data, outfile)

        # HTML(string='''<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><font face='Helvetica' size=10 ''' + f99_full_text).\
        #     write_pdf("output/pdf/test/test.pdf")
        # pypdftk.stamp(outfile, "output/pdf/test/test.pdf", "output/pdf/test/output.pdf")

        # additional_page_counter = 0
        # if len(f99_pages_text_json['additional_pages']) > 0:
        #     continuation_file = current_app.config['FORM_TEMPLATES_LOCATION'].format('F99_CONT')
        #     os.makedirs(md5_directory + 'merge', exist_ok=True)
        #     for additional_page in f99_pages_text_json['additional_pages']:
        #         page_no += 1
        #         continuation_outfile = md5_directory + 'merge/' + str(additional_page_counter)+'.pdf'
        #         pypdftk.fill_form(continuation_file, {"PAGESTR": "PAGE "+str(page_no)+" / " + str(total_no_of_pages),
        #                                               "CONTINOUS_TEXT": additional_page[str(additional_page_counter)]}, continuation_outfile)
        #         pypdftk.concat([outfile, continuation_outfile], md5_directory + json_file_md5 + '_all_pages_temp.pdf')
        #         shutil.copy(md5_directory + json_file_md5 + '_all_pages_temp.pdf', outfile)
        #         additional_page_counter += 1
        #         os.remove(md5_directory + json_file_md5 + '_all_pages_temp.pdf')
        #
        # # Add the F99 attachment
        # if 'attachment_file' in request.files:
        #     pypdftk.concat([outfile, md5_directory + 'attachment.pdf'], md5_directory + 'all_pages.pdf')
        #     os.remove(md5_directory + 'attachment.pdf')
        # else:
        #     shutil.copy(outfile, md5_directory + 'all_pages.pdf')
        # os.remove(md5_directory + json_file_md5 +'_temp.pdf')
        # push output file to AWS
        s3 = boto3.client('s3')
        s3.upload_file(
            md5_directory + 'all_pages.pdf',
            current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'],
            md5_directory + 'all_pages.pdf',
            ExtraArgs={
                'ContentType': "application/pdf",
                'ACL': "public-read"
            })
        response = {
            # 'file_name': '{}.pdf'.format(json_file_md5),
            'pdf_url':
            current_app.config['PRINT_OUTPUT_FILE_URL'].format(json_file_md5) +
            'all_pages.pdf'
        }

        if flask.request.method == "POST":
            envelope = common.get_return_envelope(data=response)
            status_code = status.HTTP_201_CREATED
            return flask.jsonify(**envelope), status_code

    else:

        if flask.request.method == "POST":
            envelope = common.get_return_envelope(
                'false', 'JSON file is missing from your request')
            status_code = status.HTTP_400_BAD_REQUEST
            return flask.jsonify(**envelope), status_code

Example #21

0

Show file

File: test_file.py Project: cfourrou/pypdftk

def test_num_pages():
    input_file = "./Out/some_file.pdf"
    num_pgs = pypdftk.get_num_pages(input_file)
    print num_pgs

Example #22

0

Show file

def main():
  jobfile = "applicants.%s.pkl"%jobid
  try:
    os.mkdir(jobid)
  except OSError:
    print "jobid dir already exists"
  chdir(jobid)
  try:
    with open(jobfile,"rb") as f:
      app_record = pickle.load(f)
  except IOError:
    app_record = {}

  updates = {}
  # data is persistent list of apps already created,
  # attempted is ones we've done this iteration

  # cookie = extract_cookie_from_curl(input_url())
  cookie = 'Cookie: ' + curlline
  getthisurl = list_url % jobid
  curlargs = ["curl","-s",getthisurl,"-H",cookie]
  h =  check_output(curlargs)
  for name,url,pdfurl in completed_applicants(h):
    if url in broken:
      print "SKIPING BROKE APP"
      continue

    qs = urlparse.urlparse(url).query
    uid = urlparse.parse_qs(qs)['userID'][0]
    fname = "%s.%s.%s.pdf" % (jobid,uid,ts)

    # need to submit this form to receive the concatenated PDFs
    br = mechanize.Browser()
    br.addheaders = [('Cookie',curlline)]
    br.set_handle_robots(False)
    br.open(pdfurl,timeout=120.0)
    br.select_form(nr=0)
    response=br.submit()
    file_content = response.read()
    with open(fname,'wb') as f:
      f.write(file_content)

    

    # system("curl -s '%s' -H '%s' > %s" % (pdfurl,cookie,fname))
    
    # accumulate updates for full pass on applicants
    
    try:
      num_pages = pypdftk.get_num_pages(fname)
    except:
      num_pages = -1
    if num_pages > 0 and app_record.get(uid,0) == num_pages:
      print "%s (%s) unchanged number of pages, skipping" % (uid, name)
      os.unlink(fname)
    else:
      print "%s (%s) new/changed" % (uid, name)
      print "oldnum: %d\nnewnum: %d" % (app_record.get(uid,0),num_pages)
      updates[url] = (name,uid,fname)
      app_record[uid] = num_pages
  
  update_applicants(updates)
  with open(jobfile,"wb") as f:
    pickle.dump(app_record,f)