def docx_to_pdf(infilename, outfilename): # Extract the text from the DOCX file object infile and write it to # a PDF file. try: infil = opendocx(infilename) except: print("Error opening infilename") #print "Exception: " + repr(e) + "\n" sys.exit(1) paragraphs = getdocumenttext(infil) pw = PDFWriter(outfilename) pw.setFont("Courier", 12) pw.setHeader("DOCXtoPDF - convert text in DOCX file to PDF") pw.setFooter("Generated by xtopdf and python-docx") wrapper = TextWrapper(width=70, drop_whitespace=False) # For Unicode handling. new_paragraphs = [] for paragraph in paragraphs: new_paragraphs.append(paragraph.encode("utf-8")) for paragraph in new_paragraphs: lines = wrapper.wrap(paragraph) for line in lines: pw.writeLine(line) pw.writeLine("") pw.savePage() pw.close()
def text_to_pdf(txt_filename): pw = PDFWriter(txt_filename + '.pdf') pw.setFont('Courier', 12) pw.setHeader('{} converted to PDF'.format(txt_filename)) pw.setFooter('PDF conversion by xtopdf: https://google.com/search?q=xtopdf') with open(txt_filename) as txt_fil: for line in txt_fil: pw.writeLine(line.strip('\n')) pw.savePage()
def text_to_pdf(txt_filename): pw = PDFWriter(txt_filename + '.pdf') pw.setFont('Courier', 12) pw.setHeader('{} converted to PDF'.format(txt_filename)) pw.setFooter( 'PDF conversion by xtopdf: https://google.com/search?q=xtopdf') with open(txt_filename) as txt_fil: for line in txt_fil: pw.writeLine(line.strip('\n')) pw.savePage()
def pdf_book(): if request.method == 'GET': # Display the PDF book creation form. return ''' <form action="/pdf_book" method="post"> PDF file name: <input type="text" name="pdf_file_name" /> Header: <input type="text" name="header" /> Footer: <input type="text" name="footer" /> Content: <textarea name="content" rows="15" cols="50"></textarea> <input type="submit" value="Submit" /> </form> ''' else: # Create the PDF book from the posted form content. try: # Get the needed fields from the form. pdf_file_name = request.form['pdf_file_name'] header = request.form['header'] footer = request.form['footer'] content = request.form['content'] # Create a PDFWriter instance and set some of its fields. pw = PDFWriter(pdf_file_name) pw.setFont("Courier", 12) pw.setHeader(header) pw.setFooter(footer) # Get the content field. # Split it into paragraphs delimited by newlines. # Convert each paragraph into a list of lines of # maximum width 70 characters. # Print each line to the PDF file. paragraphs = content.split('\n') wrapper = TextWrapper(width=70, drop_whitespace=False) for paragraph in paragraphs: lines = wrapper.wrap(paragraph) for line in lines: pw.writeLine(line) pw.savePage() pw.close() return "OK. PDF book created in file " + pdf_file_name + ".\n" except Exception, e: traceback.print_stack() return "Error: PDF book not created.\n" + repr(e) + ".\n"
def pdf_book(): if request.method == 'GET': # Display the PDF book creation form. return ''' <form action="/pdf_book" method="post"> PDF file name: <input type="text" name="pdf_file_name" /> Header: <input type="text" name="header" /> Footer: <input type="text" name="footer" /> Content: <textarea name="content" rows="15" cols="50"></textarea> <input type="submit" value="Submit" /> </form> ''' else: # Create the PDF book from the posted form content. try: # Get the needed fields from the form. pdf_file_name = request.form['pdf_file_name'] header = request.form['header'] footer = request.form['footer'] content = request.form['content'] # Create a PDFWriter instance and set some of its fields. pw = PDFWriter(pdf_file_name) pw.setFont("Courier", 12) pw.setHeader(header) pw.setFooter(footer) # Get the content field. # Split it into paragraphs delimited by newlines. # Convert each paragraph into a list of lines of # maximum width 70 characters. # Print each line to the PDF file. paragraphs = content.split('\n') wrapper = TextWrapper(width=70, drop_whitespace=False) for paragraph in paragraphs: lines = wrapper.wrap(paragraph) for line in lines: pw.writeLine(line) pw.savePage() pw.close() return "OK. PDF book created in file " + pdf_file_name + ".\n" except Exception, e: traceback.print_stack() return "Error: PDF book not created.\n" + repr(e) + ".\n"
def main(): if len(sys.argv) < 3: usage(sys.argv[0]) sys.exit(0) try: pw = PDFWriter(sys.argv[1]) pw.setFont('Courier', 12) pw.setFooter('xtopdf: https://google.com/search?q=xtopdf') for line in fileinput.input(sys.argv[2:]): if fileinput.filelineno() == 1: pw.setHeader(fileinput.filename()) if fileinput.lineno() != 1: pw.savePage() pw.writeLine(line.strip('\n')) pw.savePage() pw.close() except Exception as e: print("Caught Exception: type: {}, message: {}".format(\ e.__class__, str(e)))
def main(): # Create some HTML for testing conversion of its text to PDF. html_doc = """ <html> <head> <title> Test file for HTMLTextToPDF </title> </head> <body> This is text within the body element but outside any paragraph. <p> This is a paragraph of text. Hey there, how do you do? The quick red fox jumped over the slow blue cow. </p> <p> This is another paragraph of text. Don't mind what it contains. What is mind? Not matter. What is matter? Never mind. </p> This is also text within the body element but not within any paragraph. </body> </html> """ pw = PDFWriter("HTMLTextTo.pdf") pw.setFont("Courier", 10) pw.setHeader("Conversion of HTML text to PDF") pw.setFooter("Generated by xtopdf: http://slid.es/vasudevram/xtopdf") # Use method chaining this time. for line in BeautifulSoup(html_doc).get_text().split("\n"): pw.writeLine(line) pw.savePage() pw.close()
def main(): # Create some HTML for testing conversion of its text to PDF. html_doc = """ <html> <head> <title> Test file for HTMLTextToPDF </title> </head> <body> This is text within the body element but outside any paragraph. <p> This is a paragraph of text. Hey there, how do you do? The quick red fox jumped over the slow blue cow. </p> <p> This is another paragraph of text. Don't mind what it contains. What is mind? Not matter. What is matter? Never mind. </p> This is also text within the body element but not within any paragraph. </body> </html> """ pw = PDFWriter("HTMLTextTo.pdf") pw.setFont("Courier", 10) pw.setHeader("Conversion of HTML text to PDF") pw.setFooter("Generated by xtopdf: http://slid.es/vasudevram/xtopdf") # Use method chaining this time. for line in BeautifulSoup(html_doc).get_text().split("\n"): pw.writeLine(line) pw.savePage() pw.close()
pw.setHeader("DOCXtoPDF - convert text in DOCX file to PDF") pw.setFooter("Generated by xtopdf and python-docx") wrapper = TextWrapper(width=70, drop_whitespace=False) # For Unicode handling. new_paragraphs = [] for paragraph in paragraphs: new_paragraphs.append(paragraph.encode("utf-8")) for paragraph in new_paragraphs: lines = wrapper.wrap(paragraph) for line in lines: pw.writeLine(line) pw.writeLine("") pw.savePage() pw.close() def usage(): return "Usage: python DOCXtoPDF.py infile.docx outfile.txt\n" def main(): try: # Check for correct number of command-line arguments. if len(sys.argv) != 3: print "Wrong number of arguments" print usage() sys.exit(1) infilename = sys.argv[1]
def main(): '''Main program to test DBFReader class. ''' # check for right num. of args if (len(sys.argv) != 3): usage() sys.exit(1) # extract dbf and pdf filenames from args dbf_fn = sys.argv[1] pdf_fn = sys.argv[2] # create and open the DBFReader instance dr = DBFReader(dbf_fn) dr.open() # create the PDFWriter instance pw = PDFWriter(pdf_fn) # and set some of its fields # set the font pw.setFont("Courier", 10) # set the page header gen_datetime = time.asctime() pw.setHeader("Generated by DBFtoPDF: Input: " + dbf_fn + \ " At: " + gen_datetime) # set the page footer pw.setFooter("Generated by DBFtoPDF: Input: " + dbf_fn + \ " At: " + gen_datetime) # create the separator for logical grouping of output sep = "=" * 60 # get the DBF file header file_header = dr.read_dbf_file_header() # print a separator line pw.writeLine(sep) # print title for the overall output pw.writeLine("Information for DBF file: %s" % (dbf_fn)) # print a separator line pw.writeLine(sep) # print the file header section title pw.writeLine("") pw.writeLine("File Header Information:") # print a separator line pw.writeLine(sep) # setup labels for file header output lbl_dbf_ver = \ "DBF version (signature) : " lbl_last_update = \ "Date of last update (YY/MM/DD): " lbl_num_recs = \ "Number of data records : " lbl_hdr_len = \ "DBF header length in bytes : " lbl_rec_len = \ "DBF record length in bytes : " lbl_num_flds = \ "Number of fields in DBF file : " # print the file header metadata with labels pw.writeLine(lbl_dbf_ver + str(file_header['ver'])) pw.writeLine(lbl_last_update + file_header['last_update']) pw.writeLine(lbl_num_recs + str(file_header['num_recs'])) pw.writeLine(lbl_hdr_len + str(file_header['hdr_len'])) pw.writeLine(lbl_rec_len + str(file_header['rec_len'])) pw.writeLine(lbl_num_flds + str(file_header['num_flds'])) # print a separator line pw.writeLine(sep) # save current page pw.savePage() # print the field headers section title pw.writeLine("Field Header Information:") # print a separator line pw.writeLine(sep) # print labels for field headers output pw.writeLine("%3s%13s%7s%8s%10s" % \ ("#", "Field name", "Type", "Length", "Decimals")) # print a separator line pw.writeLine(sep) # get num. of fields from file header num_flds = file_header["num_flds"] # get the field headers field_headers = dr.read_dbf_field_headers() # extract individual lists from the field headers list of lists fld_nam, fld_typ, fld_len, fld_dec = ( field_headers[0], field_headers[1], field_headers[2], field_headers[3] ) fld_num = 0 # print the field headers metadata while (fld_num < num_flds): s1 = "%3s" % (fld_num + 1) s2 = "%13s" % (fld_nam[fld_num]) #s3 = "%4s" % (string.replace(fld_typ[fld_num], '\0', ' ')) , s3 = "%4s" % (fld_typ[fld_num]) s4 = "%5s" % (fld_len[fld_num]) s5 = "%7s" % (fld_dec[fld_num]) s = s1 + " " + s2 + " " + s3 + " " + s4 + " " + s5 pw.writeLine(s) fld_num = fld_num + 1 # print a separator line pw.writeLine(sep) # save current page pw.savePage() # position the DBFReader instance to start reading data records dr.reset() # print the data records section title pw.writeLine("DBF Data Records:") # print a separator line pw.writeLine(sep) # print the data records rec_num = 0 while (dr.has_next_record()): rec_num = rec_num + 1 # get next data record from the DBFReader r = dr.next_record() # convert it from a list to a human-friendly string fr = dbf_record_to_string(r) # the serial num. of the record s1 = "%7d:" % (rec_num) # the record itself s2 = "%s" % (fr) # serial num. + record s = s1 + " " + s2 # print the record pw.writeLine(s) # print a separator line pw.writeLine(sep) # save current page pw.savePage() # close the DBFReader dr.close() # close the PDFWriter pw.close()
def XMLtoPDFBook(): debug("Entered XMLtoPDFBook()") global sysargv # Get command-line arguments. xml_filename = get_xml_filename(sysargv) debug("xml_filename: " + xml_filename) pdf_filename = get_pdf_filename(sysargv) debug("pdf_filename: " + pdf_filename) # Parse the XML file. try: tree = ET.ElementTree(file=xml_filename) debug("tree = " + repr(tree)) except Exception: sys.stderr.write("Error: caught exception in ET.ElementTree(file)") sys.exit(1) # Get the tree root. root = tree.getroot() debug("root.tag = " + root.tag) if root.tag != "book": debug("Error: Root tag is not 'book'") sys.exit(1) # Initialize the table of contents list. toc = [] # Initialize the chapters list. chapters = [] # Traverse the tree, extracting needed data into variables. debug("-" * 60) for root_child in root: if root_child.tag != "chapter": debug("Error: root_child tag is not 'chapter'") sys.exit(1) chapter = root_child #debug(chapter.text) chapters.append(chapter.text) try: chapter_name = chapter.attrib['name'] except KeyError: chapter_name = "" toc.append(chapter_name) debug("-" * 60) # Create and set some fields of a PDFWriter. pw = PDFWriter(pdf_filename) pw.setFont("Courier", 12) pw.setFooter("Generated by XMLtoPDFBook. Copyright 2013 Vasudev Ram") # Write the TOC. pw.setHeader("Table of Contents") chapter_num = 0 debug("Chapter names") for chapter_name in toc: debug(chapter_name) chapter_num += 1 pw.writeLine(str(chapter_num) + ": " + chapter_name) pw.savePage() # Write the chapters. chapter_num = 0 for chapter in chapters: chapter_num += 1 pw.setHeader("Chapter " + str(chapter_num) + ": " + toc[chapter_num - 1]) lines = chapter.split("\n") for line in lines: pw.writeLine(line) pw.savePage() pw.close() debug("Exiting XMLtoPDFBook()")
def main(): '''Main program to test DBFReader class. ''' # check for right num. of args if (len(sys.argv) != 3): usage() sys.exit(1) # extract dbf and pdf filenames from args dbf_fn = sys.argv[1] pdf_fn = sys.argv[2] # create and open the DBFReader instance dr = DBFReader(dbf_fn) dr.open() # create the PDFWriter instance pw = PDFWriter(pdf_fn) # and set some of its fields # set the font pw.setFont("Courier", 10) # set the page header gen_datetime = time.asctime() pw.setHeader("Generated by DBFtoPDF: Input: " + dbf_fn + \ " At: " + gen_datetime) # set the page footer pw.setFooter("Generated by DBFtoPDF: Input: " + dbf_fn + \ " At: " + gen_datetime) # create the separator for logical grouping of output sep = "=" * 60 # get the DBF file header file_header = dr.read_dbf_file_header() # print a separator line pw.writeLine(sep) # print title for the overall output pw.writeLine("Information for DBF file: %s" % (dbf_fn)) # print a separator line pw.writeLine(sep) # print the file header section title pw.writeLine("") pw.writeLine("File Header Information:") # print a separator line pw.writeLine(sep) # setup labels for file header output lbl_dbf_ver = \ "DBF version (signature) : " lbl_last_update = \ "Date of last update (YY/MM/DD): " lbl_num_recs = \ "Number of data records : " lbl_hdr_len = \ "DBF header length in bytes : " lbl_rec_len = \ "DBF record length in bytes : " lbl_num_flds = \ "Number of fields in DBF file : " # print the file header metadata with labels pw.writeLine(lbl_dbf_ver + str(file_header['ver'])) pw.writeLine(lbl_last_update + file_header['last_update']) pw.writeLine(lbl_num_recs + str(file_header['num_recs'])) pw.writeLine(lbl_hdr_len + str(file_header['hdr_len'])) pw.writeLine(lbl_rec_len + str(file_header['rec_len'])) pw.writeLine(lbl_num_flds + str(file_header['num_flds'])) # print a separator line pw.writeLine(sep) # save current page pw.savePage() # print the field headers section title pw.writeLine("Field Header Information:") # print a separator line pw.writeLine(sep) # print labels for field headers output pw.writeLine("%3s%13s%7s%8s%10s" % \ ("#", "Field name", "Type", "Length", "Decimals")) # print a separator line pw.writeLine(sep) # get num. of fields from file header num_flds = file_header["num_flds"] # get the field headers field_headers = dr.read_dbf_field_headers() # extract individual lists from the field headers list of lists fld_nam, fld_typ, fld_len, fld_dec = (field_headers[0], field_headers[1], field_headers[2], field_headers[3]) fld_num = 0 # print the field headers metadata while (fld_num < num_flds): s1 = "%3s" % (fld_num + 1) s2 = "%13s" % (fld_nam[fld_num]) #s3 = "%4s" % (string.replace(fld_typ[fld_num], '\0', ' ')) , s3 = "%4s" % (fld_typ[fld_num]) s4 = "%5s" % (fld_len[fld_num]) s5 = "%7s" % (fld_dec[fld_num]) s = s1 + " " + s2 + " " + s3 + " " + s4 + " " + s5 pw.writeLine(s) fld_num = fld_num + 1 # print a separator line pw.writeLine(sep) # save current page pw.savePage() # position the DBFReader instance to start reading data records dr.reset() # print the data records section title pw.writeLine("DBF Data Records:") # print a separator line pw.writeLine(sep) # print the data records rec_num = 0 while (dr.has_next_record()): rec_num = rec_num + 1 # get next data record from the DBFReader r = dr.next_record() # convert it from a list to a human-friendly string fr = dbf_record_to_string(r) # the serial num. of the record s1 = "%7d:" % (rec_num) # the record itself s2 = "%s" % (fr) # serial num. + record s = s1 + " " + s2 # print the record pw.writeLine(s) # print a separator line pw.writeLine(sep) # save current page pw.savePage() # close the DBFReader dr.close() # close the PDFWriter pw.close()