Ejemplo n.º 1
0
"""
Extracting the text of a simple docx file. 
"""
import os
import sys

# adding the parent directory to PATH
path = os.path.abspath(os.path.join(os.path.dirname(__file__),".."))
sys.path.append(path)

from docx.document import DocxDocument


if __name__ == '__main__':
    try:
        doc = DocxDocument(sys.argv[1])
        newfile = open(sys.argv[2],'w')
    except:
        print('Please supply an input and output file. For example:')
        print('''  extract_text.py 'My Office 2007 extract.docx' 'outputfile.txt' ''')
        exit()
    ## Fetch all the text out of the document we just created
    paragraphs = doc.get_text()
    # Make explicit unicode version
    paragraphs_encoded = []
    for p in paragraphs:
        paragraphs_encoded.append(p.encode("utf-8"))
    ## Print our documnts test with two newlines under each paragraph
    newfile.write('\n\n'.join(paragraphs_encoded))
Ejemplo n.º 2
0
"""
import os
import sys

# adding the parent directory to PATH
path = os.path.abspath(os.path.join(os.path.dirname(__file__),".."))
sys.path.append(path)

from docx.document import DocxDocument
from docx.elements import *
from docx.meta import CoreProperties, WordRelationships

if __name__ == '__main__': 
    # creating a new document with a template dir specified
    template_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'template'))
    doc = DocxDocument(template_dir=template_path)

    # appending various elements to the newly created document.
    doc.add(heading('''Welcome to Python's docx module''',1)  )   
    doc.add(heading('Make and edit docx in 200 lines of pure Python',2))
    doc.add(paragraph('The module was created when I was looking for a Python support for MS Word .doc files on PyPI and Stackoverflow. Unfortunately, the only solutions I could find used:'))

    # Add a numbered list
    for point in ['''COM automation''','''.net or Java''','''Automating OpenOffice or MS Office''']:
        doc.add(paragraph(point,style='ListNumber'))
    doc.add(paragraph('''For those of us who prefer something simpler, I made docx.''')) 
        
    doc.add(heading('Making documents',2))
    doc.add(paragraph('''The docx module has the following features:'''))

    # Add some bullets
Ejemplo n.º 3
0
Creating a docx document from scratch and adding some elements to it.
"""
import os
import sys
import re

# adding the parent directory to PATH
path = os.path.abspath(os.path.join(os.path.dirname(__file__),".."))
sys.path.append(path)

from docx.document import DocxDocument
from docx.elements import *
from docx.meta import CoreProperties, WordRelationships


#doc = DocxDocument('modify.docx')
doc = DocxDocument('modify.docx')

# Replacing a string of text with another one.
doc.replace('This is a sample document', 'This is a modified document')

# replacing placeholder with picture
pic_paragraph = picture(doc,'python_logo.png','This is a test description')
doc.replace('IMAGE', pic_paragraph)

# Adding something to the end of the document.
doc.add(heading('Adding another element to the end of this document.',1))

# saving the new document
doc.save('modified_document.docx')