""" Extracting the text of a simple docx file. """ import os import sys # adding the parent directory to PATH path = os.path.abspath(os.path.join(os.path.dirname(__file__),"..")) sys.path.append(path) from docx.document import DocxDocument if __name__ == '__main__': try: doc = DocxDocument(sys.argv[1]) newfile = open(sys.argv[2],'w') except: print('Please supply an input and output file. For example:') print(''' extract_text.py 'My Office 2007 extract.docx' 'outputfile.txt' ''') exit() ## Fetch all the text out of the document we just created paragraphs = doc.get_text() # Make explicit unicode version paragraphs_encoded = [] for p in paragraphs: paragraphs_encoded.append(p.encode("utf-8")) ## Print our documnts test with two newlines under each paragraph newfile.write('\n\n'.join(paragraphs_encoded))
""" import os import sys # adding the parent directory to PATH path = os.path.abspath(os.path.join(os.path.dirname(__file__),"..")) sys.path.append(path) from docx.document import DocxDocument from docx.elements import * from docx.meta import CoreProperties, WordRelationships if __name__ == '__main__': # creating a new document with a template dir specified template_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'template')) doc = DocxDocument(template_dir=template_path) # appending various elements to the newly created document. doc.add(heading('''Welcome to Python's docx module''',1) ) doc.add(heading('Make and edit docx in 200 lines of pure Python',2)) doc.add(paragraph('The module was created when I was looking for a Python support for MS Word .doc files on PyPI and Stackoverflow. Unfortunately, the only solutions I could find used:')) # Add a numbered list for point in ['''COM automation''','''.net or Java''','''Automating OpenOffice or MS Office''']: doc.add(paragraph(point,style='ListNumber')) doc.add(paragraph('''For those of us who prefer something simpler, I made docx.''')) doc.add(heading('Making documents',2)) doc.add(paragraph('''The docx module has the following features:''')) # Add some bullets
Creating a docx document from scratch and adding some elements to it. """ import os import sys import re # adding the parent directory to PATH path = os.path.abspath(os.path.join(os.path.dirname(__file__),"..")) sys.path.append(path) from docx.document import DocxDocument from docx.elements import * from docx.meta import CoreProperties, WordRelationships #doc = DocxDocument('modify.docx') doc = DocxDocument('modify.docx') # Replacing a string of text with another one. doc.replace('This is a sample document', 'This is a modified document') # replacing placeholder with picture pic_paragraph = picture(doc,'python_logo.png','This is a test description') doc.replace('IMAGE', pic_paragraph) # Adding something to the end of the document. doc.add(heading('Adding another element to the end of this document.',1)) # saving the new document doc.save('modified_document.docx')