Example #1
0
  def get(self):
    
    url = self.request.get('url')
    if not url:
        url = ""
    
    
    page = self.request.get('pg')
    if not page:
        page = 'all'
    else:
        page = int(page)-1
    
    u = urllib2.urlopen(urllib2.unquote(url))
    
    output = StringIO.StringIO()
    output.write(u.read())
    
    p = PdfFileReader(output)

    pages = p.getNumPages()
    title = p.getDocumentInfo().title
    if page == "all":
        content=""
        for i in range(0, pages):
            # Extract text from page and add to content
            content += "<page number='%d'><![CDATA[%s]]></page>\n" % (i+1,  p.getPage(i).extractText())
    else:
        content = "<page number='%d'><![CDATA[%s]]></page>\n" % (page+1,  p.getPage(page).extractText())
    output.close()
    
    result = "<?xml version='1.0' encoding='UTF-8'?>\n<document url='%s' title='%s'>%s</document>" % (url, title, content)
    
    self.response.headers['Content-type'] = 'application/xml'
    self.response.out.write(result)
Example #2
0
def printMeta(filename):
    f = PdfFileReader(open(filename, "rb"))
    info = f.getDocumentInfo()
    print("[*] PDF Metadata for:" + filename)
    for item in info:
        print("[+]", item, ":", info[item])