def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/SplitAllPages/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # loop through all the pages pdf_page = 1 total_size = pdf.getPages().size() while pdf_page <= total_size: # create a new Document object new_document = Document() # get the page at particular index of Page Collection new_document.getPages().add(pdf.getPages().get_Item(pdf_page)) # save the newly generated PDF file new_document.save(dataDir + "page_#{pdf_page}.pdf") pdf_page+=1 print "Split process completed successfully!"
def __init__(self): dataDir = Settings.dataDir + "WorkingWithText/ExtractTextFromAllPages/" # Open the target document pdf = Document(dataDir + "input1.pdf") # create TextAbsorber object to extract text text_absorber = TextAbsorber() # accept the absorber for all the pages pdf.getPages().accept(text_absorber) # In order to extract text from specific page of document, we need to specify the particular page using its index against accept(..) method. # accept the absorber for particular PDF page # pdfDocument.getPages().get_Item(1).accept(textAbsorber) # get the extracted text extracted_text = text_absorber.getText() # create a writer and open the file writer = FileWriter(File(dataDir + "extracted_text.out.txt")) writer.write(extracted_text) # write a line of text to the file # tw.WriteLine(extractedText) # close the stream writer.close() print "Text extracted successfully. Check output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithText/ExtractTextFromAllPages/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # create TextAbsorber object to extract text text_absorber = TextAbsorber() # accept the absorber for all the pages pdf.getPages().accept(text_absorber) # In order to extract text from specific page of document, we need to specify the particular page using its index against accept(..) method. # accept the absorber for particular PDF page # pdfDocument.getPages().get_Item(1).accept(textAbsorber) #get the extracted text extracted_text = text_absorber.getText() # create a writer and open the file writer = FileWriter(File(dataDir + "extracted_text.out.txt")) writer.write(extracted_text) # write a line of text to the file # tw.WriteLine(extractedText) # close the stream writer.close() print "Text extracted successfully. Check output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/SplitAllPages/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # loop through all the pages pdf_page = 1 total_size = pdf.getPages().size() while pdf_page <= total_size: # create a new Document object new_document = Document() # get the page at particular index of Page Collection new_document.getPages().add(pdf.getPages().get_Item(pdf_page)) # save the newly generated PDF file new_document.save(dataDir + "page_#{pdf_page}.pdf") pdf_page += 1 print "Split process completed successfully!"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/DeletePage/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # delete a particular page pdf.getPages().delete(2) # save the newly generated PDF file pdf.save(dataDir + "output.pdf") print "Page deleted successfully!"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/InsertEmptyPageAtEndOfFile/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # insert a empty page in a PDF pdf.getPages().add() # Save the concatenated output file (the target document) pdf.save(dataDir + "output.pdf") print "Empty page added successfully!"
def __init__(self): dataDir = Settings.dataDir + "WorkingWithText/AddHtml/" # Instantiate Document object doc = Document() # Add a page to pages collection of PDF file page = doc.getPages().add() # Instantiate HtmlFragment with HTML contents title = HtmlFragment("<fontsize=10><b><i>Table</i></b></fontsize>") # set MarginInfo for margin details margin = MarginInfo() margin.setBottom(10) margin.setTop(200) # Set margin information title.setMargin(margin) # Add HTML Fragment to paragraphs collection of page page.getParagraphs().add(title) # Save PDF file doc.save(dataDir + "html.output.pdf") print "HTML added successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithText/AddText/' # Instantiate Document object doc = Document(dataDir + 'input1.pdf') # get particular page pdf_page = doc.getPages().get_Item(1) # create text fragment text_fragment = TextFragment("main text") text_fragment.setPosition(Position(100, 600)) font_repository = FontRepository() # set text properties text_fragment.getTextState().setFont( font_repository.findFont("Verdana")) text_fragment.getTextState().setFontSize(14) # create TextBuilder object text_builder = TextBuilder(pdf_page) # append the text fragment to the PDF page text_builder.appendText(text_fragment) # Save PDF file doc.save(dataDir + "Text_Added.pdf") print "Text added successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithText/AddHtml/' # Instantiate Document object doc = Document() # Add a page to pages collection of PDF file page = doc.getPages().add() # Instantiate HtmlFragment with HTML contents title = HtmlFragment("<fontsize=10><b><i>Table</i></b></fontsize>") # set MarginInfo for margin details margin = MarginInfo() margin.setBottom(10) margin.setTop(200) # Set margin information title.setMargin(margin) # Add HTML Fragment to paragraphs collection of page page.getParagraphs().add(title) # Save PDF file doc.save(dataDir + "html.output.pdf") print "HTML added successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithText/AddText/' # Instantiate Document object doc = Document(dataDir + 'input1.pdf') # get particular page pdf_page = doc.getPages().get_Item(1) # create text fragment text_fragment = TextFragment("main text") text_fragment.setPosition(Position(100, 600)) font_repository = FontRepository() # set text properties text_fragment.getTextState().setFont(font_repository.findFont("Verdana")) text_fragment.getTextState().setFontSize(14) # create TextBuilder object text_builder = TextBuilder(pdf_page) # append the text fragment to the PDF page text_builder.appendText(text_fragment) # Save PDF file doc.save(dataDir + "Text_Added.pdf") print "Text added successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/ConcatenatePdfFiles/' # Open the target document pdf1 = Document(dataDir + 'input1.pdf') # Open the source document pdf2 = Document(dataDir + 'input2.pdf') # Add the pages of the source document to the target document pdf1.getPages().add(pdf2.getPages()) # Save the concatenated output file (the target document) pdf1.save(dataDir + "Concatenate_output.pdf") print "New document has been saved, please check the output file"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/GetPage/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # get the page at particular index of Page Collection pdf_page = pdf.getPages().get_Item(1) # create a Document object new_document = Document() # add page to pages collection of document object new_document.getPages().add(pdf_page) # save the newly generated PDF file new_document.save(dataDir + "output.pdf") print "Process completed successfully!"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/GetNumberOfPages/' # Create PDF document pdf = Document(dataDir + 'input1.pdf') page_count = pdf.getPages().size() print "Page Count:" print page_count
def __init__(self): dataDir = Settings.dataDir + "WorkingWithDocumentObject/AddJavascript/" # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Adding JavaScript at Document Level # Instantiate JavascriptAction with desried JavaScript statement javaScript = JavascriptAction("this.print({bUI:true,bSilent:false,bShrinkToFit:true})") # Assign JavascriptAction object to desired action of Document doc.setOpenAction(javaScript) # Adding JavaScript at Page Level doc.getPages().get_Item(2).getActions().setOnOpen(JavascriptAction("app.alert('page 2 is opened')")) doc.getPages().get_Item(2).getActions().setOnClose(JavascriptAction("app.alert('page 2 is closed')")) # Save PDF Document doc.save(dataDir + "JavaScript-Added.pdf") print "Added JavaScript Successfully, please check the output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentObject/AddJavascript/' # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Adding JavaScript at Document Level # Instantiate JavascriptAction with desried JavaScript statement javaScript = JavascriptAction( "this.print({bUI:true,bSilent:false,bShrinkToFit:true})") # Assign JavascriptAction object to desired action of Document doc.setOpenAction(javaScript) # Adding JavaScript at Page Level doc.getPages().get_Item(2).getActions().setOnOpen( JavascriptAction("app.alert('page 2 is opened')")) doc.getPages().get_Item(2).getActions().setOnClose( JavascriptAction("app.alert('page 2 is closed')")) # Save PDF Document doc.save(dataDir + "JavaScript-Added.pdf") print "Added JavaScript Successfully, please check the output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/UpdatePageDimensions/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # get page collection page_collection = pdf.getPages() # get particular page pdf_page = page_collection.get_Item(1) # set the page size as A4 (11.7 x 8.3 in) and in Aspose.Pdf, 1 inch = 72 points # so A4 dimensions in points will be (842.4, 597.6) pdf_page.setPageSize(597.6,842.4) # save the newly generated PDF file pdf.save(dataDir + "output.pdf") print "Dimensions updated successfully!"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/GetPageProperties/' # Create PDF document pdf_document = Document(dataDir + 'input1.pdf'); # get page collection page_collection =pdf_document.getPages(); # get particular page pdf_page =page_collection.get_Item(1); #get page properties print "ArtBox : Height = " print pdf_page.getArtBox().getHeight() print ", Width = " print pdf_page.getArtBox().getWidth() print ", LLX = " print pdf_page.getArtBox().getLLX() print ", LLY = " print pdf_page.getArtBox().getLLY() print ", URX = " print pdf_page.getArtBox().getURX() print ", URY = " print pdf_page.getArtBox().getURY() print "BleedBox : Height = " print pdf_page.getBleedBox().getHeight() print ", Width = " print pdf_page.getBleedBox().getWidth() print ", LLX = " print pdf_page.getBleedBox().getLLX() print ", LLY = " print pdf_page.getBleedBox().getLLY() print ", URX = " print pdf_page.getBleedBox().getURX() print ", URY = " print pdf_page.getBleedBox().getURY() print "CropBox : Height = " print pdf_page.getCropBox().getHeight() print ", Width = " print pdf_page.getCropBox().getWidth() print ", LLX = " print pdf_page.getCropBox().getLLX() print ", LLY = " print pdf_page.getCropBox().getLLY() print ", URX = " print pdf_page.getCropBox().getURX() print ", URY = " print pdf_page.getCropBox().getURY() print "MediaBox : Height = " print pdf_page.getMediaBox().getHeight() print ", Width = " print pdf_page.getMediaBox().getWidth() print ", LLX = " print pdf_page.getMediaBox().getLLX() print ", LLY = " print pdf_page.getMediaBox().getLLY() print ", URX = " print pdf_page.getMediaBox().getURX() print ", URY = " print pdf_page.getMediaBox().getURY() print "TrimBox : Height = " print pdf_page.getTrimBox().getHeight() print ", Width = " print pdf_page.getTrimBox().getWidth() print ", LLX = " print pdf_page.getTrimBox().getLLX() print ", LLY = " print pdf_page.getTrimBox() print getLLY() print ", URX = " print pdf_page.getTrimBox().getURX() print ", URY = " print pdf_page.getTrimBox().getURY() print "Rect : Height = " print pdf_page.getRect().getHeight() print ", Width = " print pdf_page.getRect().getWidth() print ", LLX = " print pdf_page.getRect().getLLX() print ", LLY = " print pdf_page.getRect().getLLY() print ", URX = " print pdf_page.getRect().getURX() print ", URY = " +pdf_page.getRect().getURY() print "Page Number :" print pdf_page.getNumber() print "Rotate :" print pdf_page.getRotate()
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/GetPageProperties/' # Create PDF document pdf_document = Document(dataDir + 'input1.pdf') # get page collection page_collection = pdf_document.getPages() # get particular page pdf_page = page_collection.get_Item(1) #get page properties print "ArtBox : Height = " print pdf_page.getArtBox().getHeight() print ", Width = " print pdf_page.getArtBox().getWidth() print ", LLX = " print pdf_page.getArtBox().getLLX() print ", LLY = " print pdf_page.getArtBox().getLLY() print ", URX = " print pdf_page.getArtBox().getURX() print ", URY = " print pdf_page.getArtBox().getURY() print "BleedBox : Height = " print pdf_page.getBleedBox().getHeight() print ", Width = " print pdf_page.getBleedBox().getWidth() print ", LLX = " print pdf_page.getBleedBox().getLLX() print ", LLY = " print pdf_page.getBleedBox().getLLY() print ", URX = " print pdf_page.getBleedBox().getURX() print ", URY = " print pdf_page.getBleedBox().getURY() print "CropBox : Height = " print pdf_page.getCropBox().getHeight() print ", Width = " print pdf_page.getCropBox().getWidth() print ", LLX = " print pdf_page.getCropBox().getLLX() print ", LLY = " print pdf_page.getCropBox().getLLY() print ", URX = " print pdf_page.getCropBox().getURX() print ", URY = " print pdf_page.getCropBox().getURY() print "MediaBox : Height = " print pdf_page.getMediaBox().getHeight() print ", Width = " print pdf_page.getMediaBox().getWidth() print ", LLX = " print pdf_page.getMediaBox().getLLX() print ", LLY = " print pdf_page.getMediaBox().getLLY() print ", URX = " print pdf_page.getMediaBox().getURX() print ", URY = " print pdf_page.getMediaBox().getURY() print "TrimBox : Height = " print pdf_page.getTrimBox().getHeight() print ", Width = " print pdf_page.getTrimBox().getWidth() print ", LLX = " print pdf_page.getTrimBox().getLLX() print ", LLY = " print pdf_page.getTrimBox() print getLLY() print ", URX = " print pdf_page.getTrimBox().getURX() print ", URY = " print pdf_page.getTrimBox().getURY() print "Rect : Height = " print pdf_page.getRect().getHeight() print ", Width = " print pdf_page.getRect().getWidth() print ", LLX = " print pdf_page.getRect().getLLX() print ", LLY = " print pdf_page.getRect().getLLY() print ", URX = " print pdf_page.getRect().getURX() print ", URY = " + pdf_page.getRect().getURY() print "Page Number :" print pdf_page.getNumber() print "Rotate :" print pdf_page.getRotate()