def __init__(self): dataDir = Settings.dataDir + "WorkingWithText/ExtractTextFromAllPages/" # Open the target document pdf = Document(dataDir + "input1.pdf") # create TextAbsorber object to extract text text_absorber = TextAbsorber() # accept the absorber for all the pages pdf.getPages().accept(text_absorber) # In order to extract text from specific page of document, we need to specify the particular page using its index against accept(..) method. # accept the absorber for particular PDF page # pdfDocument.getPages().get_Item(1).accept(textAbsorber) # get the extracted text extracted_text = text_absorber.getText() # create a writer and open the file writer = FileWriter(File(dataDir + "extracted_text.out.txt")) writer.write(extracted_text) # write a line of text to the file # tw.WriteLine(extractedText) # close the stream writer.close() print "Text extracted successfully. Check output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithText/AddText/' # Instantiate Document object doc = Document(dataDir + 'input1.pdf') # get particular page pdf_page = doc.getPages().get_Item(1) # create text fragment text_fragment = TextFragment("main text") text_fragment.setPosition(Position(100, 600)) font_repository = FontRepository() # set text properties text_fragment.getTextState().setFont( font_repository.findFont("Verdana")) text_fragment.getTextState().setFontSize(14) # create TextBuilder object text_builder = TextBuilder(pdf_page) # append the text fragment to the PDF page text_builder.appendText(text_fragment) # Save PDF file doc.save(dataDir + "Text_Added.pdf") print "Text added successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithText/AddHtml/' # Instantiate Document object doc = Document() # Add a page to pages collection of PDF file page = doc.getPages().add() # Instantiate HtmlFragment with HTML contents title = HtmlFragment("<fontsize=10><b><i>Table</i></b></fontsize>") # set MarginInfo for margin details margin = MarginInfo() margin.setBottom(10) margin.setTop(200) # Set margin information title.setMargin(margin) # Add HTML Fragment to paragraphs collection of page page.getParagraphs().add(title) # Save PDF file doc.save(dataDir + "html.output.pdf") print "HTML added successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithText/ExtractTextFromAllPages/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # create TextAbsorber object to extract text text_absorber = TextAbsorber() # accept the absorber for all the pages pdf.getPages().accept(text_absorber) # In order to extract text from specific page of document, we need to specify the particular page using its index against accept(..) method. # accept the absorber for particular PDF page # pdfDocument.getPages().get_Item(1).accept(textAbsorber) #get the extracted text extracted_text = text_absorber.getText() # create a writer and open the file writer = FileWriter(File(dataDir + "extracted_text.out.txt")) writer.write(extracted_text) # write a line of text to the file # tw.WriteLine(extractedText) # close the stream writer.close() print "Text extracted successfully. Check output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithText/AddText/' # Instantiate Document object doc = Document(dataDir + 'input1.pdf') # get particular page pdf_page = doc.getPages().get_Item(1) # create text fragment text_fragment = TextFragment("main text") text_fragment.setPosition(Position(100, 600)) font_repository = FontRepository() # set text properties text_fragment.getTextState().setFont(font_repository.findFont("Verdana")) text_fragment.getTextState().setFontSize(14) # create TextBuilder object text_builder = TextBuilder(pdf_page) # append the text fragment to the PDF page text_builder.appendText(text_fragment) # Save PDF file doc.save(dataDir + "Text_Added.pdf") print "Text added successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/SplitAllPages/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # loop through all the pages pdf_page = 1 total_size = pdf.getPages().size() while pdf_page <= total_size: # create a new Document object new_document = Document() # get the page at particular index of Page Collection new_document.getPages().add(pdf.getPages().get_Item(pdf_page)) # save the newly generated PDF file new_document.save(dataDir + "page_#{pdf_page}.pdf") pdf_page += 1 print "Split process completed successfully!"
def __init__(self): dataDir = Settings.dataDir + "WorkingWithText/AddHtml/" # Instantiate Document object doc = Document() # Add a page to pages collection of PDF file page = doc.getPages().add() # Instantiate HtmlFragment with HTML contents title = HtmlFragment("<fontsize=10><b><i>Table</i></b></fontsize>") # set MarginInfo for margin details margin = MarginInfo() margin.setBottom(10) margin.setTop(200) # Set margin information title.setMargin(margin) # Add HTML Fragment to paragraphs collection of page page.getParagraphs().add(title) # Save PDF file doc.save(dataDir + "html.output.pdf") print "HTML added successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentConversion/PdfToDoc/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # Save the concatenated output file (the target document) pdf.save(dataDir + "output.doc") print "Document has been converted successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/GetNumberOfPages/' # Create PDF document pdf = Document(dataDir + 'input1.pdf') page_count = pdf.getPages().size() print "Page Count:" print page_count
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentConversion/PdfToExcel/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # Instantiate ExcelSave Option object excelsave = ExcelSaveOptions() # Save the output to XLS format pdf.save(dataDir + "Converted_Excel.xls", excelsave) print "Document has been converted successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentConversion/SvgToPdf/' # Instantiate LoadOption object using SVG load option options = SvgLoadOptions() # Create document object pdf = Document(dataDir + 'Example.svg', options) # Save the output to XLS format pdf.save(dataDir + "SVG.pdf") print "Document has been converted successfully"
def optimize_web(dataDir): dataDir = Settings.dataDir + 'WorkingWithDocumentObject/Optimize/' # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Optimize for web doc.optimize() #Save output document doc.save(dataDir + "Optimized_Web.pdf") print "Optimized PDF for the Web, please check output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/DeletePage/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # delete a particular page pdf.getPages().delete(2) # save the newly generated PDF file pdf.save(dataDir + "output.pdf") print "Page deleted successfully!"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/SplitAllPages/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # loop through all the pages pdf_page = 1 total_size = pdf.getPages().size() while pdf_page <= total_size: # create a new Document object new_document = Document() # get the page at particular index of Page Collection new_document.getPages().add(pdf.getPages().get_Item(pdf_page)) # save the newly generated PDF file new_document.save(dataDir + "page_#{pdf_page}.pdf") pdf_page+=1 print "Split process completed successfully!"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentConversion/PdfToSvg/' # Open the target document pdf = Document(dataDir + 'input1.pdf'); # instantiate an object of SvgSaveOptions save_options = SvgSaveOptions(); # do not compress SVG image to Zip archive save_options.CompressOutputToZipArchive = False; # Save the output to XLS format pdf.save(dataDir + "Output.svg", save_options); print "Document has been converted successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentConversion/PdfToSvg/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # instantiate an object of SvgSaveOptions save_options = SvgSaveOptions() # do not compress SVG image to Zip archive save_options.CompressOutputToZipArchive = False # Save the output to XLS format pdf.save(dataDir + "Output.svg", save_options) print "Document has been converted successfully"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentObject/GetXMPMetadata/' # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Get properties print "xmp:CreateDate: " print doc.getMetadata().get_Item("xmp:CreateDate") print "xmp:Nickname: " print doc.getMetadata().get_Item("xmp:Nickname") print "xmp:CustomProperty: " print doc.getMetadata().get_Item("xmp:CustomProperty")
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/ConcatenatePdfFiles/' # Open the target document pdf1 = Document(dataDir + 'input1.pdf') # Open the source document pdf2 = Document(dataDir + 'input2.pdf') # Add the pages of the source document to the target document pdf1.getPages().add(pdf2.getPages()) # Save the concatenated output file (the target document) pdf1.save(dataDir + "Concatenate_output.pdf") print "New document has been saved, please check the output file"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/UpdatePageDimensions/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # get page collection page_collection = pdf.getPages() # get particular page pdf_page = page_collection.get_Item(1) # set the page size as A4 (11.7 x 8.3 in) and in Aspose.Pdf, 1 inch = 72 points # so A4 dimensions in points will be (842.4, 597.6) pdf_page.setPageSize(597.6,842.4) # save the newly generated PDF file pdf.save(dataDir + "output.pdf") print "Dimensions updated successfully!"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentObject/SetPdfFileInfo/' # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Get document information doc_info = doc.getInfo() doc_info.setAuthor("Aspose.Pdf for java") doc_info.setCreationDate(Date()) doc_info.setKeywords("Aspose.Pdf, DOM, API") doc_info.setModDate(Date()) doc_info.setSubject("PDF Information") doc_info.setTitle("Setting PDF Document Information") # save update document with information doc.save(dataDir + "Updated_Information.pdf") print "Update document information, please check output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentObject/GetPdfFileInfo/' # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Get document information doc_info = doc.getInfo() # Show document information print "Author:" print doc_info.getAuthor() print "Creation Date:" print doc_info.getCreationDate() print "Keywords:" print doc_info.getKeywords() print "Modify Date:" print doc_info.getModDate() print "Subject:" print doc_info.getSubject() print "Title:" print doc_info.getTitle()
def __init__(self): dataDir = Settings.dataDir + "WorkingWithDocumentObject/GetPdfFileInfo/" # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Get document information doc_info = doc.getInfo() # Show document information print "Author:" print doc_info.getAuthor() print "Creation Date:" print doc_info.getCreationDate() print "Keywords:" print doc_info.getKeywords() print "Modify Date:" print doc_info.getModDate() print "Subject:" print doc_info.getSubject() print "Title:" print doc_info.getTitle()
def __init__(self): dataDir = Settings.dataDir + "WorkingWithDocumentObject/GetXMPMetadata/" # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Get properties print "xmp:CreateDate: " print doc.getMetadata().get_Item("xmp:CreateDate") print "xmp:Nickname: " print doc.getMetadata().get_Item("xmp:Nickname") print "xmp:CustomProperty: " print doc.getMetadata().get_Item("xmp:CustomProperty")
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/GetPage/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # get the page at particular index of Page Collection pdf_page = pdf.getPages().get_Item(1) # create a Document object new_document = Document() # add page to pages collection of document object new_document.getPages().add(pdf_page) # save the newly generated PDF file new_document.save(dataDir + "output.pdf") print "Process completed successfully!"
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentObject/AddJavascript/' # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Adding JavaScript at Document Level # Instantiate JavascriptAction with desried JavaScript statement javaScript = JavascriptAction( "this.print({bUI:true,bSilent:false,bShrinkToFit:true})") # Assign JavascriptAction object to desired action of Document doc.setOpenAction(javaScript) # Adding JavaScript at Page Level doc.getPages().get_Item(2).getActions().setOnOpen( JavascriptAction("app.alert('page 2 is opened')")) doc.getPages().get_Item(2).getActions().setOnClose( JavascriptAction("app.alert('page 2 is closed')")) # Save PDF Document doc.save(dataDir + "JavaScript-Added.pdf") print "Added JavaScript Successfully, please check the output file."
def __init__(self): dataDir = Settings.dataDir + "WorkingWithDocumentObject/AddJavascript/" # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Adding JavaScript at Document Level # Instantiate JavascriptAction with desried JavaScript statement javaScript = JavascriptAction("this.print({bUI:true,bSilent:false,bShrinkToFit:true})") # Assign JavascriptAction object to desired action of Document doc.setOpenAction(javaScript) # Adding JavaScript at Page Level doc.getPages().get_Item(2).getActions().setOnOpen(JavascriptAction("app.alert('page 2 is opened')")) doc.getPages().get_Item(2).getActions().setOnClose(JavascriptAction("app.alert('page 2 is closed')")) # Save PDF Document doc.save(dataDir + "JavaScript-Added.pdf") print "Added JavaScript Successfully, please check the output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/InsertEmptyPageAtEndOfFile/' # Open the target document pdf = Document(dataDir + 'input1.pdf') # insert a empty page in a PDF pdf.getPages().add() # Save the concatenated output file (the target document) pdf.save(dataDir + "output.pdf") print "Empty page added successfully!"
def optimize_web(dataDir): dataDir = Settings.dataDir + 'WorkingWithDocumentObject/SetExpiration/' # Open a pdf document. doc = Document(dataDir + "input1.pdf") javascript = JavascriptAction( "var year=2014;" "var month=4;" "today = new Date();" "today = new Date(today.getFullYear(), today.getMonth());" "expiry = new Date(year, month);" "if (today.getTime() > expiry.getTime())" "app.alert('The file is expired. You need a new one.');" ) doc.setOpenAction(javascript) # save update document with information doc.save(dataDir + "set_expiration.pdf") print "Update document information, please check output file."
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/GetPageProperties/' # Create PDF document pdf_document = Document(dataDir + 'input1.pdf'); # get page collection page_collection =pdf_document.getPages(); # get particular page pdf_page =page_collection.get_Item(1); #get page properties print "ArtBox : Height = " print pdf_page.getArtBox().getHeight() print ", Width = " print pdf_page.getArtBox().getWidth() print ", LLX = " print pdf_page.getArtBox().getLLX() print ", LLY = " print pdf_page.getArtBox().getLLY() print ", URX = " print pdf_page.getArtBox().getURX() print ", URY = " print pdf_page.getArtBox().getURY() print "BleedBox : Height = " print pdf_page.getBleedBox().getHeight() print ", Width = " print pdf_page.getBleedBox().getWidth() print ", LLX = " print pdf_page.getBleedBox().getLLX() print ", LLY = " print pdf_page.getBleedBox().getLLY() print ", URX = " print pdf_page.getBleedBox().getURX() print ", URY = " print pdf_page.getBleedBox().getURY() print "CropBox : Height = " print pdf_page.getCropBox().getHeight() print ", Width = " print pdf_page.getCropBox().getWidth() print ", LLX = " print pdf_page.getCropBox().getLLX() print ", LLY = " print pdf_page.getCropBox().getLLY() print ", URX = " print pdf_page.getCropBox().getURX() print ", URY = " print pdf_page.getCropBox().getURY() print "MediaBox : Height = " print pdf_page.getMediaBox().getHeight() print ", Width = " print pdf_page.getMediaBox().getWidth() print ", LLX = " print pdf_page.getMediaBox().getLLX() print ", LLY = " print pdf_page.getMediaBox().getLLY() print ", URX = " print pdf_page.getMediaBox().getURX() print ", URY = " print pdf_page.getMediaBox().getURY() print "TrimBox : Height = " print pdf_page.getTrimBox().getHeight() print ", Width = " print pdf_page.getTrimBox().getWidth() print ", LLX = " print pdf_page.getTrimBox().getLLX() print ", LLY = " print pdf_page.getTrimBox() print getLLY() print ", URX = " print pdf_page.getTrimBox().getURX() print ", URY = " print pdf_page.getTrimBox().getURY() print "Rect : Height = " print pdf_page.getRect().getHeight() print ", Width = " print pdf_page.getRect().getWidth() print ", LLX = " print pdf_page.getRect().getLLX() print ", LLY = " print pdf_page.getRect().getLLY() print ", URX = " print pdf_page.getRect().getURX() print ", URY = " +pdf_page.getRect().getURY() print "Page Number :" print pdf_page.getNumber() print "Rotate :" print pdf_page.getRotate()
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithDocumentObject/GetDocumentWindow/' # Open a pdf document. doc = Document(dataDir + "input1.pdf") # Get different document properties # Position of document's window - Default: false print "CenterWindow :- " print doc.getCenterWindow() # Predominant reading order; determine the position of page # when displayed side by side - Default: L2R print "Direction :- " print doc.getDirection() # Whether window's title bar should display document title. # If false, title bar displays PDF file name - Default: false print "DisplayDocTitle :- " print doc.getDisplayDocTitle() #Whether to resize the document's window to fit the size of #first displayed page - Default: false print "FitWindow :- " print doc.getFitWindow() # Whether to hide menu bar of the viewer application - Default: false print "HideMenuBar :-" print doc.getHideMenubar() # Whether to hide tool bar of the viewer application - Default: false print "HideToolBar :-" print doc.getHideToolBar() # Whether to hide UI elements like scroll bars # and leaving only the page contents displayed - Default: false print "HideWindowUI :-" print doc.getHideWindowUI() # The document's page mode. How to display document on exiting full-screen mode. print "NonFullScreenPageMode :-" print doc.getNonFullScreenPageMode() # The page layout i.e. single page, one column print "PageLayout :-" print doc.getPageLayout() #How the document should display when opened. print "pageMode :-" print doc.getPageMode()
def __init__(self): dataDir = Settings.dataDir + 'WorkingWithPages/GetPageProperties/' # Create PDF document pdf_document = Document(dataDir + 'input1.pdf') # get page collection page_collection = pdf_document.getPages() # get particular page pdf_page = page_collection.get_Item(1) #get page properties print "ArtBox : Height = " print pdf_page.getArtBox().getHeight() print ", Width = " print pdf_page.getArtBox().getWidth() print ", LLX = " print pdf_page.getArtBox().getLLX() print ", LLY = " print pdf_page.getArtBox().getLLY() print ", URX = " print pdf_page.getArtBox().getURX() print ", URY = " print pdf_page.getArtBox().getURY() print "BleedBox : Height = " print pdf_page.getBleedBox().getHeight() print ", Width = " print pdf_page.getBleedBox().getWidth() print ", LLX = " print pdf_page.getBleedBox().getLLX() print ", LLY = " print pdf_page.getBleedBox().getLLY() print ", URX = " print pdf_page.getBleedBox().getURX() print ", URY = " print pdf_page.getBleedBox().getURY() print "CropBox : Height = " print pdf_page.getCropBox().getHeight() print ", Width = " print pdf_page.getCropBox().getWidth() print ", LLX = " print pdf_page.getCropBox().getLLX() print ", LLY = " print pdf_page.getCropBox().getLLY() print ", URX = " print pdf_page.getCropBox().getURX() print ", URY = " print pdf_page.getCropBox().getURY() print "MediaBox : Height = " print pdf_page.getMediaBox().getHeight() print ", Width = " print pdf_page.getMediaBox().getWidth() print ", LLX = " print pdf_page.getMediaBox().getLLX() print ", LLY = " print pdf_page.getMediaBox().getLLY() print ", URX = " print pdf_page.getMediaBox().getURX() print ", URY = " print pdf_page.getMediaBox().getURY() print "TrimBox : Height = " print pdf_page.getTrimBox().getHeight() print ", Width = " print pdf_page.getTrimBox().getWidth() print ", LLX = " print pdf_page.getTrimBox().getLLX() print ", LLY = " print pdf_page.getTrimBox() print getLLY() print ", URX = " print pdf_page.getTrimBox().getURX() print ", URY = " print pdf_page.getTrimBox().getURY() print "Rect : Height = " print pdf_page.getRect().getHeight() print ", Width = " print pdf_page.getRect().getWidth() print ", LLX = " print pdf_page.getRect().getLLX() print ", LLY = " print pdf_page.getRect().getLLY() print ", URX = " print pdf_page.getRect().getURX() print ", URY = " + pdf_page.getRect().getURY() print "Page Number :" print pdf_page.getNumber() print "Rotate :" print pdf_page.getRotate()