Python htmlescapeの例、uplib.webutils.htmlescape Pythonの例

コード例 #1

0

ファイルを表示

ファイル: angelHandler.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def python_exception_html(excn, extra = None):

    typ, value, tb = excn
    s = ''.join(traceback.format_exception(typ, value, tb))
    s2 = '<html><body><p>Error:'
    if extra:
        s2 = s2 + '  ' + htmlescape(extra)
    s2 = s2 + '<br>\n<p><pre>' + htmlescape(s) + '</pre></body></html>'
    return s2

コード例 #2

0

ファイルを表示

ファイル: UploadDocument.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def upload(repo, response, params):
    """
    Obtain a Web form which supports file upload from a Web browser.

    :returns:  a Web form supporting file upload
    :rtype: text/html
    """

    fp = response.open()
    fp.write('<html><head><title>Upload document to "%s"</title><head>\n' % htmlescape(repo.name()))
    fp.write('<body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR)
    fp.write('<script type="text/javascript" src="/html/javascripts/prototype.js"></script>\n')
    fp.write('<script type="text/javascript">\n'
             'var ext_to_content_type_mapping = {\n')
    for key in CONTENT_TYPES:
        fp.write('   "%s": "%s",\n' % (CONTENT_TYPES[key], key))
    fp.write('   };\n\n'
             'function choose_appropriate_type(filename) {\n'
             '  var ext = filename.split(".")[1];\n'
             '  for (var e in ext_to_content_type_mapping) {\n'
             '     if (e == ext)\n'
             '        return ext_to_content_type_mapping[e];\n'
             '  };\n'
             '  return "undefined";\n'
             '}\n\n'
             'function on_filename_change (e) {\n'
             '  var a = choose_appropriate_type(document.forms.uploadform.content.value);\n'
             '  document.forms.uploadform.contenttype.value = a;\n'
             '  document.forms.uploadform.documentname.value = document.forms.uploadform.content.value;\n'
             '}\n'
             '</script>\n')
    fp.write('<form enctype="multipart/form-data" id="uploadform" action="/action/UploadDocument/add" method="POST" target="_top">\n')
    fp.write('<input type=hidden name=wait value=watch>\n')
    fp.write('<input type=hidden name=documentname value="">\n')
    referer = response.request.get_header('referer')
    fp.write('<p>File to upload:  <input type="file" name=content size=50 value="%s"' % (referer or "") +
             '  onchange="{void(on_filename_change(this));}">\n')
    fp.write('<p>Content-Type of file: <select name="contenttype" size=1>\n')
    fp.write('<option value="undefined" selected>-- undefined --</option>\n')
    for key in CONTENT_TYPES:
        hkey = htmlescape(key)
        fp.write('<option value="%s">%s</option>\n' % (hkey, hkey))
    fp.write('</select>\n')
    fp.write('<p>Optional metadata for the document:<br><table>'
             '<tr><td>Title for document: </td><td><input type=text name="md-title" size=60></td></tr>\n'
             '<tr><td>Authors <i>(" and "-separated)</i>:  </td><td><input type=text name="md-authors" size=60></td></tr>\n'
             '<tr><td>Publication date <i>(mm/dd/yyyy)</i>: </td><td><input type=text name="md-date" size=60></td></tr>\n'
             '<tr><td>Categories <i>(comma-separated)</i>: </td><td><input type=text name="md-categories" size=60></td></tr>\n'
             '</table>\n')
    fp.write('<p><input type=submit name=submit value=submit>\n')
    fp.write('</form></body></html>\n')

コード例 #3

0

ファイルを表示

ファイル: UploadDocument.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def addnote(repo, response, params):
    """
    Obtain a Web form with which to add a note to the repository.  Useful
    for taking notes in meeting.

    :return: a Web form with which to add a note to the repository
    :rtype: text/html
    """
    

    # send back a note to upload

    fp = response.open()
    fp.write('<html><head><title>Add note to "%s"</title><head>\n' % htmlescape(repo.name()))
    fp.write('<body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR)
    fp.write('<form enctype="multipart/form-data" id="addnote" action="/action/UploadDocument/add" method="POST" target="_top">\n')
    fp.write('<input type=hidden name=wait value=true>\n')
    # add fake filename with 3x5 extension to trigger CardDoc parser
    fp.write('<input type=hidden name=documentname value="note.3x5">\n')
    fp.write('<input type=hidden name=contenttype value="text/plain">\n')
    fp.write('<p><input type=textarea name="content" value="" style="width: 100%; height: 50%;">\n')
    fp.write('<p><input type=submit name=submit value=submit>\n')
    fp.write('<p>Optional metadata for the document:<br><table>'
             '<tr><td>Categories <i>(comma-separated)</i>: </td><td><input type=text name="md-categories" size=60></td></tr>\n'
             '<tr><td>Title for document: </td><td><input type=text name="md-title" size=60></td></tr>\n'
             '<tr><td>Authors <i>(" and "-separated)</i>:  </td><td><input type=text name="md-authors" size=60></td></tr>\n'
             '</table>\n')
    fp.write('</form></body></html>\n')

コード例 #4

0

ファイルを表示

ファイル: parser.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

 def do_attendee (attendee, annotation, c, x, y, framesize):
     link = attendee.value
     cn = attendee.params.get("CN")
     nameaddr = cn and cn[0]
     if nameaddr:
         realname, emailaddr = parseaddr(nameaddr.replace(",", "%2C"))
         if realname:
             text = realname.replace("%2C", ",")
         elif emailaddr:
             text = emailaddr
         else:
             text = nameaddr
     elif link:
         text = link                    
     text = htmlescape(text)
     if link:
         text = '<link href="' + link + '">' + text + '</link>'
     if annotation:
         text += ' <i>' + annotation + '</i>'
     p = Paragraph(text, ParagraphStyle("normal"))
     w, h = p.wrapOn(c, *framesize)
     y -= h
     p.drawOn(c, x, y)
     # y -= (0.1 * inch)
     return y

コード例 #5

0

ファイルを表示

ファイル: images.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def show_images (repo, response, params):

    import Image

    id = params.get("doc_id")
    if not id:
        response.error(HTTPCodes.BAD_REQUEST, "No doc ID specified")
        return

    doc = repo.get_document(id)

    images = doc.get_metadata("illustrations-bounding-boxes")
    if not images:
        response.reply("No illustration data found for %s." % doc)
        return
    dpi = int(doc.get_metadata("dpi") or doc.get_metadata("images-dpi") or 300)

    fp = response.open()
    fp.write("<body><h1>Illustrations in %s</h1>" % htmlescape(str(doc)))

    currentpage = None
    im = None
    for image in images.split(","):
        pageno, type, left, top, width, height = image.split(":")
        pageno = int(pageno)
        if pageno != currentpage:
            if currentpage is not None:
                fp.write('<hr>\n')
            fp.write("<p>Page %s" % (pageno + 1))
            currentpage = pageno
            im = None
        left = int(left)
        top = int(top)
        width = int(width)
        height = int(height)
        newwidth, newheight = (width * 75) / dpi, (height * 75)/dpi
        if (newwidth < 1) or (newheight < 1):
            continue
        filepath = os.path.join(doc.folder(), "page-images", "page%05d.png" % (pageno + 1))
        if im is None:
            if not os.path.exists(filepath):
                fp.write('<p>No image file %s for page %s' % (filepath, (pageno + 1)))
            else:
                im = Image.open(filepath)
            if im.mode in ("1", "P", "L"):
                im = im.convert("RGB")
            
        img = im.crop((left, top, left + width + 1, top + height + 1))
        img.load()
        # rescale to 75 dpi
        if dpi != 75:
            img = img.resize((newwidth, newheight), Image.ANTIALIAS)
        # convert to data: URL
        fpi = StringIO.StringIO()
        img.save(fpi, "PNG")
        bits = fpi.getvalue()
        fpi.close()
        fp.write('<p>%s:<br><img src="data:image/png;base64,%s">\n' % (image, base64.encodestring(bits).strip()))

コード例 #6

0

ファイルを表示

ファイル: parser.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

 def format_description(self, c, x, y, framesize):
     if 'description' in self.__event.contents:
         text = '<i>Description:</i> ' + htmlescape(self.__event.description.value.strip())
         text = text.replace('\n', '<br />')
         p = Paragraph(text, ParagraphStyle("normal"))
         w, h = p.wrapOn(c, *framesize)
         y -= h
         p.drawOn(c, x, y)
     return y

コード例 #7

0

ファイルを表示

ファイル: FilterWebPage.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

        def render (self, fp):
            from uplib.plibUtil import note
            from uplib.webutils import htmlescape

            if not self.content:
                raise ValueError("can't score this document")

            if type(fp) in types.StringTypes:
                fp = open(fp, "ab")

            fp.write(u'<head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'.encode("UTF-8", "strict"))
            fp.write((u'<title>%s</title></head>\n' % htmlescape(self.title)).encode("UTF-8", "strict"))
            fp.write((u'<body>\n<h1>%s</h1>\n' % htmlescape(self.title)).encode("UTF-8", "strict"))
            if self.authors:
                self.clean_authors(self.authors)
                fp.write('<p class="uplib-authors">%s</p>\n' % self.authors.renderContents())
            if self.date:
                self.clean_date(self.date)
                fp.write('<p class="uplib-pubdate">%s</p>\n' % self.date.renderContents())
            self.clean_content(self.content)
            fp.write(self.content.prettify())
            fp.write(u'</body>\n'.encode("UTF-8", "strict"))

コード例 #8

0

ファイルを表示

ファイル: tornadoHandler.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

 def get_login_page(self):
     return ('<head><title>Login Page</title>\n' +
             '<script>\n' +
             '<!--\n' +
             'function sf(){document.f.password.focus();}\n' +
             '// -->\n' + 
             '</script></head>\n' +
             '<body bgcolor="#ef280e" onload="sf()">\n' +
             '<table width=100% height=100%><tr align=center><td align=center>' +
             '<table bgcolor=black cellpadding=10><tr bgcolor=white><td>' +
             '<center>Please enter pass-phrase:<br>' +
             '<form action="/login" method=POST enctype="multipart/form-data" name=f>\n' +
             '<input type=password size=60 name=password value=""><P>&nbsp;<br>\n' +
             ('<input type=hidden name=originaluri value="%s"><P>&nbsp;<br>\n'
              % htmlescape(self.request.uri, True)) +
             '<input type=submit value="Login">\n' +
             '</center></form></td></tr></table></td></tr></table></body>')

コード例 #9

0

ファイルを表示

ファイル: tornadoHandler.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

    def get_error_html (self, status_code, **kwargs):

        if 'message' in kwargs and 'content_type' in kwargs:
            content_type = kwargs.get('content_type')
            message = kwargs.get('message')
            if content_type.startswith("text/plain"):
                message = '<pre>' + htmlescape(message) + '</pre>'
                content_type = "text/html"
            if (content_type is None) or (content_type == "text/html"):
                return "<html><title>%(code)d: %(stdmsg)s</title>" \
                       "<body>%(code)d: %(message)s</body></html>" % {
                           "code": status_code,
                           "stdmsg": httplib.responses[status_code],
                           "message" : message,
                           }
            else:
                raise RuntimeError("Error messages must be HTML")
        else:
            RequestHandler.get_error_html(self, status_code, **kwargs)

コード例 #10

0

ファイルを表示

ファイル: angelHandler.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

                note(4, "forked off request")
                return false
            except Exception, x:
                note(0, "signalling exception <%s> at point 1a:", x)
                excn_data = sys.exc_info()
                signal_python_exception(request, excn_data)
#                 s2 = python_exception_html (excn_data, None)
#                 request.reply_code = 500
#                 request['Content-Type'] = 'text/html'
#                 request['Content-Length'] = len(s2)
#                 request.push(s2)
                return true
        else:
            # can't use request.error() here because request.done() will be called twice
            request.reply_code = 501
            action = htmlescape("/action/" + module_name + "/" + function_name)
            if exception:
                s = u"<html><head><title>Error loading module:  %s</title></head><body><p>Attempt to load module/function <i>%s/%s</i> raised an exception:\n<pre>%s</pre><p>(extensions path = [<tt>%s</tt>], sys.path = <tt>%s</tt>)</body></html>" % (action, module_name, function_name, exception, self.__repo__.get_actions_path(), htmlescape(str(sys.path)))
            else:
                s = u"<html><head><title>No such action:  %s</title></head><body><p>No such action:  %s.<br>actions path = [%s]</body></html>" % (action, action, self.__repo__.get_actions_path())
            s = s.encode("UTF-8", "replace")
            request['Content-Type'] = "text/html; charset=UTF-8"
            request['Content-Length'] = len(s)
            request.push(s)
            return true


    def handle_request (self, request):

        request['Server'] = "UpLib/%s" % self.version
        request.version = '1.0'         # stick with 1.0 for Medusa

コード例 #11

0

ファイルを表示

ファイル: createHTML.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def do_HTML (dirpath, html_dir, doc_id, port):

    note(3, "  HTMLing in %s...", dirpath)
    html_index = os.path.join(dirpath, "index.html")
    doc_id = os.path.basename(dirpath)
    retval = false
    try:
        if not os.path.exists(html_dir):
            os.mkdir(html_dir)
            os.chmod(html_dir, 0700)

        metadata = read_metadata(os.path.join(dirpath, "metadata.txt"))
        title = metadata.get('name') or metadata.get('title') or doc_id
        pagewidth = None
        pageheight = None
        bts = metadata.get('big-thumbnail-size')
        if bts:
            pagewidth, pageheight = [int(x) for x in string.split(bts, ',')]
            note(3, "    title is %s, pagesize is %sx%s", title, pagewidth, pageheight)

        # start with summary.html

        note(3, "    summary.html")
        summarypath = os.path.join(dirpath, "summary.txt")
        if os.path.exists(summarypath):
            f = open(summarypath, 'r')
            summary_text = f.read()
            f.close()
            html_summary = htmlescape(summary_text, true)
        else:
            html_summary = ""
        html_summary_path = os.path.join(html_dir, "summary.html")
        f = open(html_summary_path, 'w')
        f.write('<html><body>' + html_summary + '</body></html>');
        f.close()
        os.chmod(html_summary_path, 0600)        

        # next thumbs.html

        note(3, "    thumbs.html")
        thumbs_path = os.path.join(html_dir, "thumbs.html")
        f = open(thumbs_path, "w")
        if USE_VIRTUAL_INK:
            bgcolor = "white"
        else:
            bgcolor = STANDARD_TOOLS_COLOR
        f.write('<html><body bgcolor="%s"><center>\n' % bgcolor)
        thumbnail_dir = os.path.join(dirpath, "thumbnails")
        thumbnail_files = os.listdir(thumbnail_dir)
        thumbs = []
        for thumbnail in thumbnail_files:
            m = re.match(r"(\d+).png", thumbnail)
            if m:
                thumbs.append((int(m.group(1)), thumbnail,))
        thumbs.sort()
        for thumbnail in thumbs:
            page_no = int(thumbnail[0])
            f.write('<a href="page%s.html" target=viewarea>' % page_no)
            f.write('<img src="../thumbnails/%s" border=1></a><br>\n' % thumbnail[1])

            # now write the HTML connected to that thumbnail
            page_html = os.path.join(html_dir, "page%s.html" % page_no)
            f2 = open (page_html, 'w')
            # get width of large page
            if not pagewidth or not pageheight:
                im = Image.open(os.path.join(thumbnail_dir, "big%s.png" % page_no))
                pagewidth, pageheight = im.size[0] - 25, im.size[1]
                note(3, "    title is %s, pagesize is %sx%s", title, pagewidth, pageheight)
                del im
            f2.write('<html><body bgcolor="white"><img src="../thumbnails/big%s.png" usemap="#page%smap" border=0>\n' % (page_no, page_no))
            f2.write('<map name="page%smap">\n' % page_no)
            if (page_no < len(thumbs)):
                f2.write('<area href="page%s.html" alt="to Page %s" shape="circle" coords="%s,60,10">\n'
                         % (page_no + 1, page_no + 1, pagewidth + 15))
                f2.write('<area href="page%s.html" alt="to Page %s" shape="rect" coords="%s,0,%s,%s">\n'
                         % (page_no + 1, page_no + 1, pagewidth/2, pagewidth, pageheight))
            if (page_no > 1):
                f2.write('<area href="page%s.html" alt="to Page %s" shape="circle" coords="%s,90,10">\n'
                         % (page_no - 1, page_no - 1, pagewidth + 15))
                f2.write('<area href="page%s.html" alt="to Page %s" shape="rect" coords="0,0,%s,%s">\n'
                         % (page_no - 1, page_no - 1, (pagewidth/2)-1, pageheight))
            f2.write('<area href="/" alt="to repository" target="_top" shape="circle" coords="%s,207,10">\n'
                     % (pagewidth + 15))
            f2.write('</map></body></html>\n')
            f2.close()
            os.chmod(page_html, 0600)
        f.write('</center></body></html>')
        f.close()
        os.chmod (thumbs_path, 0600)

        # next is controls.html

        note(3, "    controls.html")
        controls_path = os.path.join(html_dir, "controls.html")
        f = open(controls_path, "w")
        if CONTROLS_TEMPLATE:
            f.write(CONTROLS_TEMPLATE % { 'doc-id': doc_id })
        else:
            f.write('<html>\n<head>\n')
            f.write('<script type="text/javascript">\n')
            f.write('function newInWindow(did, title, w, h, sidebar, twopage) {\n')
            f.write('  var s = "/action/basic/dv_show?doc_id=" + did + "&no-margin=1";\n')
            f.write('  var c = "width=" + w + ",height=" + h;\n')
            f.write('  if (!sidebar)\n')
            f.write('    s = s + "&no-sidebar=1";\n')
            f.write('  if (twopage)\n')
            f.write('    s = s + "&two-pages=1";\n')
            f.write('  defaultStatus = s;\n')
            f.write('  window.open(s, title, config=c);\n')
            f.write('}\n')
            f.write('</script></head><body bgcolor="%s">\n<center>\n' % STANDARD_TOOLS_COLOR)
            f.write("""<a href="javascript:newInWindow('%s','%s', %d+30, %d+10, false, false); void 0;">Detach</a>""" % (doc_id, htmlescape(title, true), pagewidth, pageheight))
            f.write(""" <a href="javascript:newInWindow('%s','%s', (2 * %d)+30, %d+10, false, true); void 0;">(2)</a>\n""" % (doc_id, htmlescape(title, true), pagewidth, pageheight))
            buttons = get_buttons_sorted(FN_DOCUMENT_SCOPE)
            for button in buttons:
                url = button[1][4]
                target = button[1][3]
                label = button[1][0]
                if url:
                    f.write('<br>\n<a href="%s"' % htmlescape(url % doc_id, true))
                else:
                    f.write('<br>\n<a href="/action/basic/repo_userbutton?uplib_userbutton_key=%s&doc_id=%s"' % (button[0], doc_id))
                if target:
                    f.write(' target="%s"' % target)
                f.write('>%s</a>\n' % label)
            f.write("</center></body></html>")
        f.close()
        os.chmod(controls_path, 0600)

        # then index.html

        note(3, "    index.html")
        f = open(html_index, "w")
        f.write('<head>\n')
        f.write('<title>%s</title>\n</head>\n' % htmlescape(title))
        f.write('<base target="_top">'
                '<frameset cols="%s,*">'
                '<frameset rows="%s,*">'
                '<frame name=controls src="./html/controls.html">'
                '<frame name=thumbs src="./html/thumbs.html">'
                '</frameset>'
                '<frame name="viewarea" src="./html/page1.html">'
                '</frameset>\n' % (THUMBNAIL_COLWIDTH, CONTROLS_HEIGHT))
        f.close()
        os.chmod(html_index, 0600)

        # indicate successful completion
        note(3, "  finished.")
        retval = true

    except:
        info = sys.exc_info()
        note(0, "exception raised in createHTML:\n%s\n", string.join(traceback.format_exception(*info)))
        raise

    else:
        if not retval:
            note("bad retval %s", retval)
            if os.path.exists(html_index): os.unlink(html_index)
            if os.path.exists(html_dir): shutil.rmtree(html_dir)

コード例 #12

0

ファイルを表示

ファイル: ePub.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def get_epub_version (repo, response, params):

    doc_id = params.get("doc_id")
    if not doc_id:
        response.error(HTTPCodes.BAD_REQUEST, "No doc specified.\n")
        return
    elif not repo.valid_doc_id(doc_id):
        response.error(HTTPCodes.BAD_REQUEST, "Invalid doc ID %s specified.\n" % doc_id)
        return

    doc = repo.get_document(doc_id)

    bookid = "uplibhash:" + doc.sha_hash()

    page_count = int(doc.get_metadata("page-count") or doc.get_metadata("pagecount") or "0")
    language = doc.text_language() or "en-US"

    package = (u'<?xml version="1.0"?>\n' +
               u'<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookId">\n')

    metadata = (u'<metadata xmlns:dc="http://purl.org/dc/elements/1.1/"\n' +
                u'          xmlns:opf="http://www.idpf.org/2007/opf">\n' +
                u'  <dc:identifier id="BookId">%s</dc:identifier>\n' % htmlescape(bookid) +
                u'  <dc:language>%s</dc:language>\n' % htmlescape(language))

    ncx = u"""<?xml version="1.0"  encoding="UTF-8"?>
              <!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" 
              "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
              <ncx version="2005-1" xml:lang="en" xmlns="http://www.daisy.org/z3986/2005/ncx/">
               <head>
                 <meta name="dtb:uid" content="%s"/>
                 <meta name="dtb:depth" content="6"/>
                 <meta name="dtb:generator" content="UpLib %s"/>
                 <meta name="dtb:totalPageCount" content="%s"/>
                 <meta name="dtb:maxPageNumber" content="0"/>
               </head>
               """ % (bookid, UPLIB_VERSION, page_count)

    title = doc.get_metadata("title") or unicode(doc)
    authors = doc.get_metadata("authors")
    ncx += u"<docTitle><text>" + htmlescape(title) + u"</text></docTitle>\n"
    metadata += u'  <dc:title>%s</dc:title>\n' % htmlescape(title)
    if authors:
        authors = authors.split(" and ")
        for author in authors:
            ncx += u"<docAuthor><text>" + htmlescape(author) + u"</text></docAuthor>\n"
            metadata += u'  <dc:creator>%s</dc:creator>\n' % htmlescape(author)
    metadata += u'</metadata>\n'

    ncx += u'<navMap>\n'
    manifest = u'<manifest>\n'
    spine = u'<spine toc="toc.ncx">\n'

    contentpath = _get_html_filepath(doc, debug=("rebuild",))
    content = open(contentpath, "rb").read()
    # remove META tags
    start = content.index("</head>")
    content = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" ' +
               '               "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n' +
               '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="%s">\n<head>\n' % language.encode("UTF-8", "strict") +
               '<title>%s</title>\n</head>\n' % htmlescape(title)) + content[start + len('</head>'):]
    manifest += u'  <item id="contents" href="contents.xhtml" media-type="application/xhtml+xml" />\n'
    spine += u'  <itemref idref="contents" />\n'
    ncx += u'<navPoint id="contents" playOrder="1"><navLabel><text>Content</text></navLabel><content src="contents.xhtml" /></navPoint>\n'

    content, images = _separate_images(content)
    for image in images:
        content_type, bits = images[image]
        manifest += u'  <item id="%s" href="images/%s" media-type="%s" />\n' % (
            image, image, content_type)            

#     for page_index, bboxes in wordboxes_page_iterator(doc.folder()):
#         page_xhtml = (u'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n' +
#                       u'<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="%s">\n' % language +
#                       u'<body>\n')
# #         if pageno in illustrations:
# #             for left, top, width, height, tp, image, junk in illustrations.get(pageno):
# #                 page_xhtml += u'<img width="%s" height="%s" alt="image on page" style="position:absolute; left:%spt; top:%spt;" src="%s" />' % (
# #                     width, height, left, top, _form_data_url(image))
#         for bbox in bboxes:
#             face = (bbox.is_italic() and "Italic") or "Regular"
#             family = (bbox.is_fixedwidth() and "Monospace") or (bbox.is_serif() and "Serif") or "Sans-Serif"
#             weight = (bbox.is_bold() and "Bold") or "Regular"
#             page_xhtml += u'<span style="font-family: %s; font-style: %s; font-weight: %s; font-size: %spt">%s</span>' % (
#                 bbox.left(), bbox.top(), family, face, weight, bbox.font_size() * 0.8, htmlescape(bbox.text()))
#             if bbox.ends_word():
#                 page_xhtml += u"\n"
#         page_xhtml += u"</body></html>\n"
#         pages[page_index] = page_xhtml
#         manifest += u'  <item id="page-%d" href="page-%d.xhtml" media-type="application/xhtml+xml" />\n' % (page_index, page_index)
#         spine += u'  <itemref idref="page-%d" />\n' % page_index
#         ncx += u'<navPoint class="page" id="page-%d" playOrder="%d"><navLabel><text>Page %s</text></navLabel><content src="page-%d.xhtml" /></navPoint>\n' % (
#             page_index, page_index + 1, doc.page_index_to_page_number_string(page_index), page_index)

    # close up the spine elements
    ncx += "</navMap>\n</ncx>"
    manifest += u'  <item id="toc.ncx" href="toc.ncx" media-type="application/x-dtbncx+xml" />\n'
    manifest += u'</manifest>\n'
    spine += u'</spine>\n'
    package += metadata + manifest + spine + u'</package>\n'

    # build the zip container
    filepath = os.path.join(doc.folder(), "versions")
    if not os.path.exists(filepath):
        os.mkdir(filepath)
        os.chmod(filepath, 0700)
    filepath = os.path.join(filepath, "document.epub")
    zf = zipfile.ZipFile(filepath, "w", zipfile.ZIP_STORED, True)
    zf.comment = "%s (from UpLib repository '%s', doc ID %s)" % (htmlescape(doc.get_metadata("title")), htmlescape(repo.name()), doc_id)
    zf.writestr("mimetype", "application/epub+zip")
    zf.writestr("META-INF/container.xml",
                """<?xml version="1.0"?>
                <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
                <rootfiles>
                <rootfile full-path="packagelayout.opf"
                media-type="application/oebps-package+xml" />     
                </rootfiles>
                </container>
                """)
    zf.writestr(_get_zip_info("packagelayout.opf"), package.encode("UTF-8", "strict"))
    for image in images:
        content_type, bits = images[image]
        zf.writestr("images/%s" % image, bits)
    zf.writestr(_get_zip_info("contents.xhtml"), content)
    zf.writestr(_get_zip_info("toc.ncx"), ncx.encode("UTF-8", "strict"))
    zf.close()

    response.return_file("application/epub", filepath)

コード例 #13

0

ファイルを表示

ファイル: ePub.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def get_svg_version (repo, response, params):

    doc_id = params.get("doc_id")
    if not doc_id:
        response.error(HTTPCodes.BAD_REQUEST, "No doc specified.\n")
        return
    elif not repo.valid_doc_id(doc_id):
        response.error(HTTPCodes.BAD_REQUEST, "Invalid doc ID %s specified.\n" % doc_id)
        return

    note("doc_id is %s", doc_id)
    doc = repo.get_document(doc_id)
    page = params.get("page")
    if not page:
        response.error(HTTPCodes.BAD_REQUEST, "No page index specified.")
        return
    page = int(page)
    note("page is %s", page)
    page_count = int(doc.get_metadata("page-count") or doc.get_metadata("pagecount") or "0")
    if page >= page_count:
        response.error(HTTPCodes.BAD_REQUEST, "No such page %d." % page)
        return

    language = doc.text_language() or "en-US"
    dpi = int(doc.get_metadata('images-dpi') or doc.get_metadata('tiff-dpi') or doc.get_metadata("dpi") or 300)
    page_image_size = tuple([(float(x.strip())*72/float(dpi))
                             for x in (doc.get_metadata("images-size") or
                                       doc.get_metadata("tiff-size")).split(",")])

    pages = {}
    illustrations = {}
    links = {}

    imd = read_illustrations_metadata(doc.folder(), True)
    for (left, top, width, height, type, bits, pageno) in imd:
        if ((width * height) < 100):
            continue
        if pageno in illustrations:
            illustrations[pageno].append((left, top, width, height, bits, pageno))
        else:
            illustrations[pageno] = [(left, top, width, height, bits, pageno)]
    lmd = doc.links().values()
    for link in lmd:
        if hasattr(link, "from_page") and (link.typename == "uri"):
            pageno = link.from_page
            if pageno in links:
                links[pageno].append(link)
            else:
                links[pageno] = [link]            

    note("links are %s", links)

    for page_index, bboxes in wordboxes_page_iterator(doc.folder()):

        page_svg  = (u'''<?xml version="1.0" standalone="no"?>
                         <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
                                   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
                            <svg width="%spt" height="%spt" version="1.1"
                                 xmlns="http://www.w3.org/2000/svg"
                                 xmlns:xlink="http://www.w3.org/1999/xlink">
                     ''' % page_image_size)

        if page_index in illustrations:
            for left, top, width, height, image, junk in illustrations.get(page_index):
                page_svg += u'<image x="%spt" y="%spt" width="%spt" height="%spt" xlink:href="%s" />\n' % (
                    left, top, width, height, _form_data_url(image))

        if page_index in links:
            note("links for %s are %s", page_index, links.get(page_index))
            for link in links[page_index]:
                fr = getattr(link, "from_rect")
                if fr:
                    left, top, width, height = fr
                    uri = urllib.quote_plus(link.to_uri)
                    page_svg += (u'<a xlink:href="%s"><rect x="%spt" y="%spt" ' % (uri, left, top) +
                                 u'width="%spt" height="%spt" fill="none" stroke="none" /></a>\n' % (
                                     width, height))

        for bbox in bboxes:
            face = (bbox.is_italic() and "Italic") or "Regular"
            family = (bbox.is_fixedwidth() and "Monospace") or (bbox.is_serif() and "Serif") or "Sans-Serif"
            weight = (bbox.is_bold() and "Bold") or "Regular"
            page_svg += u'<text x="%spt" y="%spt" font-family="%s" font-size="%spt" font-style="%s" font-weight="%s">%s</text>' % (
                bbox.left(), bbox.top(), family, bbox.font_size() * 0.9, face, weight, htmlescape(bbox.text()))
            if bbox.ends_word():
                page_svg += u"\n"
        page_svg += u"</svg>\n"
        pages[page_index] = page_svg

    for pageno in pages:
        note("%s: %s\n", pageno, len(pages.get(pageno)))

    response.reply(pages.get(page), "image/svg+xml")

コード例 #14

0

ファイルを表示

ファイル: categories.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def doc_categorize (repo, response, params):

    from uplib.basicPlugins import show_abstract, _is_sensible_browser
    from uplib.basicPlugins import show_title, STANDARD_BACKGROUND_COLOR, STANDARD_TOOLS_COLOR, STANDARD_LEGEND_COLOR
    from uplib.basicPlugins import __issue_javascript_head_boilerplate as issue_javascript_head_boilerplate
    from uplib.basicPlugins import __issue_menu_definition as issue_menu_definition
    from uplib.basicPlugins import __issue_title_styles as issue_title_styles

    global _CONFIGURATION
    if _CONFIGURATION is None:
        _CONFIGURATION = { "exclusions": [
            re.compile(x.strip()) for x in configurator.default_configurator().get("categorize-excluded-categories", "").split(",") if x.strip()]}

    def figure_size(count, avgsize):
        if avgsize < 0.0001:
            return 0.0001
        return math.sqrt(math.log((count * (math.e - 1))/avgsize + 1))

    doc_id = params.get("doc_id")
    if not doc_id:
        response.error(HTTPCodes.BAD_REQUEST, "No doc_id parameter specified.")
        return
    doc = repo.valid_doc_id(doc_id) and repo.get_document(doc_id)
    if not doc:
        response.error(HTTPCodes.BAD_REQUEST, "Invalid doc_id parameter '%s' specified." % doc_id)
        return
    fp = response.open()
    title = (doc.get_metadata("title") or doc.id).encode("UTF-8", "strict")
    fp.write("<head><title>Categorizing '%s'</title>\n" % htmlescape(title))
    fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n')
    fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n')
    fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n')
    issue_javascript_head_boilerplate(fp)
    issue_title_styles(fp)
    fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR)
    issue_menu_definition(fp)
    show_abstract(repo, doc, fp, _is_sensible_browser(response.user_agent), showpagesearch=False)
    fp.write("<hr />\n")
    doccats = [x.lower() for x in doc.get_category_strings()]
    for cat in doccats[:]:
        if cat.find('/') >= 0:
            parts = cat.split('/')
            for i in range(1, len(parts)):
                doccats.append('/'.join(parts[:i]))
    tags = find_likely_tags(doc)
    if tags:
        # try to remove duplicates
        stags = min(10, len(tags))
#         tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0]
#         count = 0
#         i = 0
#         while tagnames and (i < stags):
#             if tags[i][0] in tagnames:
#                 del tags[i]
#                 stags = min(10, len(tags))
#                 tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0]
#             else:
#                 i += 1

        fp.write("<center><small><i>Likely categories</i></small><br />")
        count = 0
        topscore = _adjust_score(*tags[0][1][:2])
        exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions")
        for name, (score, ndocs, ascore) in tags:

            if count > stags:
                break

            skip = False
            for exclusion in exclusions:
                if exclusion.match(name.lower()):
                    skip = True
                    break
            if skip:
                continue

            if count > 0:
                fp.write(" &middot; ")
            #size = max(0.5, (2/topscore) * ascore)
            size = 1
            color = (name.lower() in doccats) and "red" or "black"
            action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % (
                (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name))
            fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category (score=%.3f)">%s</a>' % (
                size, color, action,
                (name.lower() in doccats) and "remove" or "add",
                htmlescape(name), ascore, htmlescape(name)))
            count += 1
        fp.write("</center></p><hr />\n")
    fp.write('<form action="%s" method=get><center>Add a new category to this document: ' %
             ('/'.join(response.request_path.split('/')[:3]) + '/doc_add_category'))
    fp.write('<input type=hidden name="doc_id" value="%s">\n' % doc.id)
    fp.write('<input type=text name="tag" value="" size=40></form></center>\n')
    note(4, "doc_categorize:  retrieving repository categories... (%s)", time.ctime())
    cats = repo.get_categories_with_docs()
    note(4, "doc_categorize:  have categories (%s)", time.ctime())
    if cats:
        fp.write("<hr>\n<center><small><i>All categories</i></small><br />")
        avgsize = sum([len(x) for x in cats.values()]) / float(len(cats))
        catkeys = cats.keys()
        catkeys.sort(lambda x, y: cmp(x.lower(), y.lower()))
        first = True
        exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions")
        for name in catkeys:
            skip = False
            for exclusion in exclusions:
                if exclusion.match(name.lower()):
                    skip = True
                    break
            if skip:
                continue

            if not first:
                fp.write(" &middot; ")
            else:
                first = False
            size = max(0.5, figure_size(len(cats[name]), avgsize))
            color = (name.lower() in doccats) and "red" or "black"
            action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % (
                (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name))
            actionsee = '/action/basic/repo_search?query=%s' % (
                urllib.quote_plus('categories:"%s"' % name))
            fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category">%s</a>' % (
                size, color, action,
                (name.lower() in doccats) and "remove" or "add",
                htmlescape(name), htmlescape(name)))
            fp.write('<a style="font-size: %fem; color: %s; vertical-align: super;" href="%s" ' % (
                max(0.4, size/2), STANDARD_LEGEND_COLOR, actionsee) +
                     'title="see the %s document%s in the \'%s\' category" target="_blank">%d</a>' % (
                         (len(cats[name]) == 1) and "one" or str(len(cats[name])),
                         (len(cats[name]) != 1) and "s" or "", htmlescape(name), len(cats[name])))
                     
    fp.write("</body>\n")

コード例 #15

0

ファイルを表示

ファイル: UploadDocument.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def _add_internal (ostream, percent_done_fn, repo, response, params, content, wait):

    # this can be called in several different ways.
    # In general, you post a multipart/form-data body which
    # contains a "contenttype" for the document, and either a "URL"
    # for the content, or a "content" parameter containing the
    # the actual content.  If both "URL" and "content" are present,
    # the URL is added as the "original-url" value for the metadata,
    # and if the content is HTML, it's used as the "original.html"
    # and the URL is used to pull ancillary content referenced in it.

    content_type = params.get("contenttype")
    url = params.get("URL")
    noredir = params.get("no-redirect")
    noredir = noredir and (noredir.lower() == "true")
    uploadloc = url
    docname = params.get("documentname")
    tempf = None
    suppress_duplicates = params.get("suppress-duplicates")
    suppress_duplicates = suppress_duplicates and (suppress_duplicates.lower() == "true")
    bury = params.get("bury")
    bury = bury and (bury.lower() == "true")
    verbosity = int(params.get("verbosity") or "0")
    if content:
        if wait and ostream:
            _rewrite_job_output(ostream, '{ state: 0, msg: "Caching page..."}')
        extension = CONTENT_TYPES.get(content_type)
        if not extension:
            if wait:
                msg = "Don't know what to do with contenttype \"%s\"" % content_type
                if ostream:
                    _rewrite_job_output(ostream, '{state: 1, msg: "' + urllib.quote(msg) + '"}')
                else:
                    response.error(HTTPCodes.UNSUPPORTED_MEDIA_TYPE, msg)
            return
        # special case HTML/XHTML
        if content and (content_type.lower() in ("text/html", "application/xhtml+xml")):
            tempf = tempfile.mkdtemp()
            uploadloc = os.path.join(tempf, "original.html")
            # make sure that the folder for other parts exists, even if empty
            os.mkdir(os.path.join(tempf, "original_files"))
            # remove our bookmarklet, if present
            content = _BOOKMARKLET_PATTERN.sub('', content)
            content = _ADD_FORM_PATTERN.sub('', content)
            c = _OurCacher(url, filename=uploadloc, bits=content, content_type=content_type)
            # make sure that the folder for other parts exists, even if empty
            other_parts = os.path.join(tempf, "original_files")
            if not os.path.exists(other_parts):
                os.mkdir(other_parts)
        # special case 3x5 cards
        elif (docname and (content_type.lower() == "text/plain") and os.path.splitext(docname)[1] == ".3x5"):
            fd, tempf = tempfile.mkstemp(".3x5")
            fp = os.fdopen(fd, "wb")
            fp.write(content)
            fp.close()
            uploadloc = tempf
        else:
            fd, tempf = tempfile.mkstemp("." + extension)
            fp = os.fdopen(fd, "wb")
            fp.write(content)
            fp.close()
            uploadloc = tempf
        if suppress_duplicates:
            hash = calculate_originals_fingerprint(tempf)
            results = repo.do_query("sha-hash:"+hash)
            if results:
                # it's a duplicate
                doc = results[0][1]
                if os.path.isdir(tempf):
                    shutil.rmtree(tempf)
                elif os.path.exists(tempf):
                    os.remove(tempf)
                if ostream:
                    _rewrite_job_output(ostream, '{ state: 2, doc_id: "' + doc.id + '"}')
                elif noredir:
                    response.reply(doc.id, "text/plain")
                else:
                    response.redirect("/action/basic/dv_show?doc_id=%s" % doc.id)
                return
    try:
        try:
            # get a cookie for authentication
            cookie = repo.new_cookie(url or content[:min(100, len(content))])
            cookie_str = '%s=%s; path=/; Secure' % (cookie.name(), cookie.value())
            os.environ["UPLIB_COOKIE"] = cookie_str
            doctitle = params.get("md-title")
            docauthors = params.get("md-authors")
            docdate = params.get("md-date")
            doccats = params.get("md-categories")
            metadata = params.get("metadata")
            if metadata:
                mdtmpfile = tempfile.mktemp()
                open(mdtmpfile, "w").write(metadata)
                # check to see if we're replacing an existing document
                md2 = read_metadata(StringIO.StringIO(metadata))
                existing_doc_id = md2.get("replacement-contents-for")
                if existing_doc_id and not repo.valid_doc_id(existing_doc_id):
                    raise ValueError("Invalid doc ID %s specified for replacement" % existing_doc_id)
            else:
                mdtmpfile = None
                existing_doc_id = None
            # now form the command
            scheme = ((repo.get_param("use-http", "false").lower() == "true") or _use_http) and "http" or "https"
            cmd = '%s --verbosity=%s --repository=%s://127.0.0.1:%s ' % (_uplib_add_document, verbosity, scheme, repo.port())
            if doctitle:
                cmd += ' --title=%s' % pipes.quote(doctitle)
            if docauthors:
                cmd += ' --authors=%s' % pipes.quote(docauthors)
            if docdate:
                cmd += ' --date="%s"' % docdate
            if doccats:
                cmd += ' --categories=%s' % pipes.quote(doccats)
            if mdtmpfile:
                cmd += ' --metadata="%s"' % mdtmpfile
            cmd += ' "%s"' % uploadloc
            if ostream:
                _rewrite_job_output(ostream, '{state: 0, msg: "' + urllib.quote(cmd) + '"}')
            # and invoke the command
            status, output, tsignal = subproc(cmd)
            note(4, "cmd is %s, status is %s, output is %s", repr(cmd), status, repr(output.strip()))
            if mdtmpfile:
                os.unlink(mdtmpfile)
            if status == 0:
                # success; output should be doc-id
                doc_id = existing_doc_id or output.strip().split()[-1]
                note(4, "output is '%s'; doc_id for new doc is %s", output.strip(), doc_id)
                if wait and ostream:
                    _rewrite_job_output(ostream, '{ state: 1, doc_id: "' + doc_id + '", msg: "' + urllib.quote(output) + '"}')
                # wait for it to come on-line
                if percent_done_fn:
                    percent_done_fn(40)         # estimate 40% of work done on client side
                while not repo.valid_doc_id(doc_id):
                    if ostream:
                        pending = repo.list_pending(full=True)
                        s = _first(pending, lambda x: x['id'] == doc_id)
                        if not s:
                            break
                        dstatus = s['status']
                        if dstatus == 'error':
                            msg = 'server-side error incorporating document'
                            _rewrite_job_output(ostream, '{ state: 3, doc_id: "' + doc_id
                                                + '", msg: "' + urllib.quote(s['error']) + '"}')
                            break
                        if dstatus == 'unpacking':
                            msg = 'starting ripper process...'
                        elif dstatus == 'ripping':
                            msg = "ripping with ripper '" + s['ripper'] + "'..."
                        elif dstatus == 'moving':
                            msg = 'adding to registered document set...'
                        _rewrite_job_output(ostream, '{ state: 1, doc_id: "' + doc_id
                                            + '", msg: "' + urllib.quote(msg) + '"}')
                    time.sleep(1.0)
                if percent_done_fn:
                    percent_done_fn(100)        # finished
                if repo.valid_doc_id(doc_id):
                    if bury:
                        # wait up to 100 seconds for it to show up in history list
                        # after that, wait another second, then bury it
                        counter = 100
                        while counter > 0:
                            h = [x.id for x in repo.history()]
                            if doc_id in h:
                                break
                            counter -= 1
                            time.sleep(1)
                        time.sleep(1)
                        repo.touch_doc(doc_id, bury=True, notify=False)
                        note(3, "buried %s", doc_id)
                    if wait:
                        if ostream:
                            _rewrite_job_output(ostream, '{ state: 2, doc_id: "' + doc_id + '"}')
                        elif noredir:
                            response.reply(doc_id, "text/plain")
                        else:
                            response.redirect("/action/basic/dv_show?doc_id=%s" % doc_id)
            else:
                note("cmd <<%s>> failed with status %s:\n%s", cmd, status, output)
                if wait:
                    if ostream:
                        _rewrite_job_output(ostream, '{ state: 3, msg: "' + urllib.quote('Error processing the document:\n' + output) + '"}')
                    else:
                        response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "<pre>" + htmlescape(output) + "</pre>")
        except:
            e = ''.join(traceback.format_exception(*sys.exc_info()))
            if wait:
                note(3, "Exception processing uplib-add-document request:\n%s", htmlescape(e))
                if ostream:
                    _rewrite_job_output(ostream, '{state: 3, msg: "' + urllib.quote("Exception processing uplib-add-document request:\n" + e) + '"}')
                else:
                    response.error(HTTPCodes.INTERNAL_SERVER_ERROR,
                                   "Exception processing uplib-add-document request:\n<pre>" +
                                   htmlescape(e) + "\n</pre>")
            else:
                note("Exception processing uplib-add-document request:\n%s", e)
    finally:
        if tempf and os.path.isfile(tempf):
            os.unlink(tempf)
        elif tempf and os.path.isdir(tempf):
            shutil.rmtree(tempf)

コード例 #16

0

ファイルを表示

ファイル: uplibBinding.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

    def build_html_abstract_display (self, doc, icon_cid):

        fp = StringIO()
        dict = doc.get_metadata()
        pubdate = dict.get("date")
        date = re.sub(" 0|^0", " ",
                      time.strftime("%d %b %Y, %I:%M %p",
                                    time.localtime(id_to_time(doc.id))))
        name = doc.id
        page_count = dict.get('page-count')
        summary = '<i>(No summary available.)</i>'
        if dict:
            if dict.has_key('title'):
                name = dict.get('title')
            elif dict.has_key('name'):
                name = '[' + dict.get('name') + ']'
        fp.write(u'<table border=0><tr><td>')
        fp.write(u'<center>')
        fp.write(u'<a href="https://%s:%d/action/basic/dv_show?doc_id=%s" border=0>' % (self.ip, doc.repo.secure_port(), doc.id))
        fp.write(u'<img src="cid:%s">' % icon_cid)
        fp.write(u'</a><p><small><font color="%s">(%s)</font></small></center></td><td>&nbsp;</td>'
                 % (STANDARD_DARK_COLOR, date))
        fp.write(u'<td valign=top><h3>%s</h3>' % htmlescape(name))
        if dict.has_key(u'authors') or pubdate:
            fp.write(u'<p><small>')
            if dict.has_key('authors'):
                fp.write(u'<b>&nbsp;&nbsp;&nbsp;&nbsp;%s</b>'
                         % (re.sub(' and ', ', ', dict['authors'])))
            if pubdate:
                formatted_date = format_date(pubdate, True)
                fp.write(u'&nbsp;&nbsp;&nbsp;&nbsp;<i><font color="%s">%s</font></i>' % (STANDARD_DARK_COLOR,
                                                                                        formatted_date))
            fp.write(u'</small>\n')
        if dict.has_key('comment'):
            summary = htmlescape(dict.get('comment', ''))
        elif dict.has_key('abstract'):
            summary = "<i>" + htmlescape(dict.get('abstract', '')) + '</i>'
        elif dict.has_key('summary'):
            summary = '<font color="%s">' % STANDARD_DARK_COLOR + htmlescape(dict.get('summary')) + '</font>'
        fp.write(u'<P>%s' % summary)
        if page_count:
            fp.write(u'<small><i><font color="%s"> &middot; (%s page%s)'
                     % (STANDARD_DARK_COLOR, page_count, ((int(page_count) != 1) and "s") or ""))
            fp.write(u'</font></i></small>\n')
        cstrings = doc.get_category_strings()
        fp.write(u'<p>Categories:  ')
        if cstrings:
            fp.write(string.join([htmlescape(s) for s in cstrings], u' &middot; '))
        else:
            fp.write('(none)')
        typ = doc.get_metadata("apparent-mime-type")
        if typ:
            mtype = ' &middot; <small>%s</small>' % typ
        else:
            mtype = ''
        fp.write(u'<p><a href="https://%s:%s/action/externalAPI/fetch_original?doc_id=%s&browser=true"><font color="%s">(Original%s)</font></a>'
                 % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR, mtype))
        fp.write(u' &middot; <a href="https://%s:%s/action/basic/doc_pdf?doc_id=%s"><font color="%s">(PDF)</font></a>'
                 % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR))
        if not mtype.lower().startswith("text/html"):
            fp.write(u' &middot; <a href="https://%s:%s/action/basic/doc_html?doc_id=%s"><font color="%s">(HTML)</font></a>'
                     % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR))
        fp.write(u'</td></tr></table>')
        d = fp.getvalue()
        fp.close()
        return d, name

コード例 #17

0

ファイルを表示

ファイル: externalAPI.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def search_repository (repository, response, params):
    """Search repository using specified query, and return hits (matching documents)
    as either a comma-separated values list of (score, ID, title) lines,
    or as an XML document, or as a zipped folder which includes the ``metadata.txt``
    file and the document icon for each hit.

    TODO:  the exact format of the XML bundle should be documented here.

    :param query: an UpLib query string
    :type query: string
    :param no-icon: optional, indicates whether to not return icons in the ziplist format, defaults to ``False``
    :type no-icon: boolean
    :param format: optional, indicates whether to return results as plain-text CSV, XML, or a Zip file. \
           if not specified, the plain-text CSV file is returned.
    :type format: string, either ``"xml"`` or ``"ziplist"``
    :return: a listing of the documents matching the query, in the specified format
    :rtype: either ``text/plain``, ``application/xml``, or ``application/x-uplib-searchresults-zipped``
    """

    from uplib.basicPlugins import get_buttons_sorted, FN_DOCUMENT_SCOPE

    if not params.has_key('query'):
        response.error(HTTPCodes.BAD_REQUEST, "No query specified.\n")
        return

    query = unicode(params.get('query'), INTERACTION_CHARSET, "replace")
    results = repository.do_query(query)
    results.sort()
    results.reverse()

    def get_doc_functions (doc):
        buttons = get_buttons_sorted(FN_DOCUMENT_SCOPE)
        retval = ""
        for button in buttons:
            if (not button[1][5]) or (button[1][5](doc)):
                url = button[1][4]
                if url is None:
                    url = "/action/basic/repo_userbutton?uplib_userbutton_key=%s&doc_id=%%s" % button[0]
                retval += "%s, %s, %s, %s\n" % (button[0], url, button[1][3], button[1][0])
        return retval

    no_icon = (params.get("no-icon") == "true")

    if response.xml_request or (params.get("format") == "xml"):

        retval = getDOMImplementation().createDocument(None, "result", None)
        e = retval.createElement('query')
        e.setAttribute('query', query)
        retval.documentElement.appendChild(e)
        for score, doc in results:
            e = retval.createElement('hit')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = doc.get_metadata("title") or u""
            title = title.replace("\r", " ")
            note("title is '%s'", title)
            e.setAttribute('title', title)
            retval.documentElement.appendChild(e)
        fp = response.open("application/xml;charset=utf-8")
        fp.write(retval.toxml("UTF-8") + "\n")
        fp.close()
        return

    elif params.get("format") == "ziplist":
        include_doc_functions = params.get("include-doc-functions")
        tpath = tempfile.mktemp()
        zf = zipfile.ZipFile(tpath, "w")
        try:
            try:
                for score, doc in results:
                    zf.writestr(doc.id.encode("ASCII", "strict") + "/", "")
                    zf.writestr(doc.id.encode("ASCII", "strict") + "/score", str(score))
                    if not no_icon:
                        zf.writestr(doc.id.encode("ASCII", "strict") + "/first.png", doc.document_icon())
                    if include_doc_functions:
                        zf.writestr(doc.id.encode("ASCII", "strict") + "/doc_functions.txt", get_doc_functions(doc))
                    zf.writestr(doc.id.encode("ASCII", "strict") + "/metadata.txt", doc.metadata_text())
            finally:
                zf.close()
            response.return_file("application/x-uplib-searchresults-zipped", tpath, true)
        except:
            msg = string.join(traceback.format_exception(*sys.exc_info()))
            os.remove(tpath)
            note("Exception building zipfile for search results:\n%s", msg)
            response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Can't build zipfile for search results:\n%s\n" % htmlescape(msg))
    else:

        fp = response.open('text/plain; charset=UTF-8')
        for score, doc in results:
            title = doc.get_metadata("title") or u""
            title = title.replace("\r", " ")
            fp.write("%f,%s,%s\n" % (score, doc.id, title.encode("UTF-8", "replace")))
        fp.close()

    return

コード例 #18

0

ファイルを表示

ファイル: UploadDocument.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def add(repo, response, params):
    """
    Add a document to the repository, calling ``uplib-add-document`` in a subprocess.

    :param wait: optional, whether to wait for the incorporation and ripping to \
           happen.  If not specified, ``add`` returns immediately after starting \
           the incorporation process.  If specified as ``true``, ``add`` will wait \
           until the document is available in the repository.  If specified as ``watch``, \
           ``add`` will start a new ``Job`` which can be "watched" with the ``fetch_job_output`` \
           function in ``uplib.externalAPI``.  If specified as ``bounce``, and the ``URL`` \
           parameter is also specified, the incorporation \
           will be started, and ``add`` will immediately return an HTTP redirect to \
           the value of ``URL``.  If specified as ``watchexternal``, will start a new ``Job`` \
           and immediately return the Job ID as a text/plain string.
    :type wait: string containing either ``watch`` or ``true`` or ``bounce``
    :param content: the actual bits of the document.  One of either ``content`` or ``URL`` must be specified.
    :type content: byte sequence
    :param contenttype: the MIME type for the document content
    :type contenttype: string containing MIME type
    :param URL: the URL for the document.  One of either ``content`` or ``URL`` must be specified.
    :type URL: string
    :param documentname: the name of the document
    :type documentname: string
    :param no-redirect: if specified as ``true``, no redirect to the incorporated document \
           will be returned; instead, a document ID string as "text/plain" will be returned, \
           if ``wait`` is specified as ``true``.  Optional, defaults to "false".
    :type no-redirect: boolean
    :param bury: optional, defaults to "false", if specified as "true" will cause \
           the newly added document to be "buried" in the history list, so that it \
           won't show up in the most-recently-used listing, as it normally would
    :type bury: boolean
    :param md-title: title to put in the document metadata
    :type md-title: string
    :param md-authors: standard UpLib authors line (" and "-separated) to put in the document metadata
    :type md-authors: string
    :param md-date: standard UpLib date ([MM[/DD]/]YYYY) to put in the document metadata
    :type md-date: string
    :param md-categories: standard UpLib categories string (comma-separated category names) to put in the document metadata
    :type md-categories: string
    :param metadata: contents of a standard UpLib metadata.txt file.  If this file is provided, \
           it is typically just passed unchanged to ``uplib-add-document``.  However, it is \
           inspected for the metadata element ``replacement-contents-for``, and if that is found, \
           ``add`` will check to see that the specified document ID is still valid in that repository.
    :type metadata: string containing "text/rfc822-headers" format data
    :returns: depends on what parameters are passed.  If ``wait`` is specified as ``true`` and ``no-redirect`` \
              is specified as ``true``, will simply wait until the document has been incorporated and \
              return the document ID as a plain text string.  If ``no-redirect`` is not specified, \
              and ``wait`` is ``true``, will return an HTTP redirect to the new document in the repository. \
              If ``wait`` is specified as ``bounce``, will return an immediate redirect to the original \
              URL for the document.  If ``wait`` is not specified, will simply immediately return an HTTP \
              200 (Success) code and a non-committal message.
    :rtype: various
    """

    wait = params.get("wait")
    content = params.get("content")
    url = params.get("URL")
    docname = params.get("documentname")
    if content and (not params.get("contenttype")):
        note(3, "add:  No contenttype specified.");
        response.error(HTTPCodes.BAD_REQUEST, "No contenttype specified")
        return
    if (not content) and (not url):
        note(3, "add:  Neither content nor URL specified.");
        response.error(HTTPCodes.BAD_REQUEST, "Nothing to upload!")
        return
    
    if wait and (wait.lower() in ("watch", "watchexternal")):
        job = Job(_add_internal, repo, None, params, content, True)
        note(3, "job id is %s", job.id)
        if url:
            title = htmlescape(url)
        elif docname:
            title = htmlescape(docname)
        else:
            title = 'document'
        if (wait.lower() == "watchexternal"):
            response.reply(job.id, "text/plain")
        else:
            fp = response.open()
            fp.write('<head><title>Adding %s to repository...</title>\n' % title)
            fp.write('<script type="text/javascript" language="javascript" src="/html/javascripts/prototype.js"></script>\n')
            fp.write(JOBS_JAVASCRIPT)
            fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR)
            fp.write('<p style="background-color: %s;"><span id="swirl">%s</span> <span id="titlespan">Adding <b>%s</b>...</span></p>\n' % (
                STANDARD_TOOLS_COLOR, SWIRLIMG, title))
            fp.write('<p id="progressreport"></p>\n')
            fp.write('<script type="text/javascript">\n'
                     'function report_error (req) {\n'
                     '  // alert("Can\'t check status of job");\n'
                     '}\n'
                     'function update_progress_report(jobid, percent_done, update_text) {\n'
                     '  // alert("update_text is " + update_text);\n'
                     '  var state = eval("(" + update_text + ")");\n'
                     '  // alert("state is " + state);\n'
                     '  if (percent_done >= 100) {\n'
                     '     $("swirl").innerHTML = \'' + SWIRLSPACER + '\';\n'
                     '     $("titlespan").innerHTML = "Finished adding ' + title + '.";\n'
                     '  }\n'
                     '  if (state.state == 2) {\n'
                     '    $("progressreport").innerHTML = \'Finished.\\n<p>Click here <a href="/action/basic/dv_show?doc_id=\' + unescape(state.doc_id) + \'"><img src="/docs/\' + unescape(state.doc_id) + \'/thumbnails/first.png" border=0></a> to open the document in the UpLib browser viewer.\';\n'
                     '  } else if (state.state == 0) {\n'
                     '    $("progressreport").innerHTML = "Extracting page images and text...";\n'
                     '  } else if (state.state == 1) {\n'
                     '    $("progressreport").innerHTML = "Finished client side, ID is " + unescape(state.doc_id) + "<br>" + unescape(state.msg);\n'
                     '  } else {\n'
                     '    $("progressreport").innerHTML = "Error:<br><pre>" + unescape(state.msg) + "</pre>";\n'
                     '  }\n'
                     '}\n'
                     'Jobs.monitor("' + job.id + '", update_progress_report, 3, report_error);\n'
                     '</script>\n')
            fp.write('</body>\n')
        return
        
    elif wait and (wait.lower() == "true"):
        response.fork_request(_add_internal, None, None, repo, response, params, content, True)
    else:
        uthread.start_new_thread(_add_internal, (None, None, repo, response, params, content, False),
                                 "UploadDocument:  adding %s" % (docname or url or time.ctime()))
        if url and (wait.lower() == "bounce"):
            response.redirect(url)
        else:
            response.reply("Started new thread to add document", "text/plain")

コード例 #19

0

ファイルを表示

ファイル: related.py プロジェクト: project-renard-survey/xerox-parc-uplib-mirror

def related (repo, response, params):
    """
    Find other documents related to the query document.

    :param doc_id: the query document
    :type doc_id: UpLib doc ID string
    :param use-authorship: whether or not to use co-authorship as a measure of relatedness.  Defaults to "true".
    :type use-authorship: "true" or "false"
    :param use-history: whether or not to to use the use history (most recently used list) as a factor in the calculation.  Defaults to "true".
    :type use-history: "true" or "false"
    :param format: whether to return non-browser format results.  Specifying "xml" will cause an XML document to be returned containing the results.  Specifying "ziplist" will cause a zip file containing extra information about each document to be returned.  If the ``format`` parameter is not specified, an HTML page showing the results broken down by category is returned.
    :type format: "xml" or "ziplist" or none
    :result: list of other documents related to the query document.  See discussion of the ``format`` parameter.
    :rtype: varies
    """

    doc_id = params.get("doc_id")
    if not doc_id:
        response.error(HTTPCodes.BAD_REQUEST, "No doc_id specified.")
        return
    if not repo.valid_doc_id(doc_id):
        response.error(HTTPCodes.NOT_FOUND, "Invalid doc_id %s specified." % doc_id)
        return
    doc = repo.get_document(doc_id)

    use_authorship = (params.get("use-authorship") or "true") == "true"
    use_history = (params.get("use-history") or "true") == "true"    

    docs, likethis, authored, recent, others, qstring = find_related(doc, True, use_history=use_history, use_authorship=use_authorship)

    if response.xml_request or (params.get("format") == "xml"):

        retval = getDOMImplementation().createDocument(None, "result", None)
        e = retval.createElement('paradigm')
        e.setAttribute('id', doc_id)
        e.setAttribute("title", _safe_title(doc))
        e.setAttribute('use-history', use_history and "true" or "false")
        e.setAttribute('use-authorship', use_authorship and "true" or "false")
        retval.documentElement.appendChild(e)
        g = retval.createElement('similar')
        g.setAttribute('query', qstring)
        for doc, score in likethis:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        g = retval.createElement('co-authored')
        for doc, score in authored:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        g = retval.createElement('recent')
        for doc, score in recent:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        g = retval.createElement('linked')
        for doc, score in others:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        g = retval.createElement('combined')
        for doc, score in docs:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        fp = response.open("application/xml;charset=utf-8")
        fp.write(retval.toxml("UTF-8") + "\n")
        fp.close()

    elif params.get("format") == "ziplist":
        no_icon = (params.get("no-icon") == "true")
        include_doc_functions = params.get("include-doc-functions")
        tpath = tempfile.mktemp()
        zf = zipfile.ZipFile(tpath, "w")
        try:
            try:
                for doc, score in docs:
                    zf.writestr(doc.id.encode("ASCII", "strict") + "/", "")
                    zf.writestr(doc.id.encode("ASCII", "strict") + "/score", str(score))
                    if not no_icon:
                        zf.writestr(doc.id.encode("ASCII", "strict") + "/first.png", doc.document_icon())
                    if include_doc_functions:
                        zf.writestr(doc.id.encode("ASCII", "strict") + "/doc_functions.txt", get_doc_functions(doc))
                    zf.writestr(doc.id.encode("ASCII", "strict") + "/metadata.txt", doc.metadata_text())
            finally:
                zf.close()
            response.return_file("application/x-uplib-searchresults-zipped", tpath, True)
        except:
            msg = string.join(traceback.format_exception(*sys.exc_info()))
            os.remove(tpath)
            note("Exception building zipfile for search results:\n%s", msg)
            response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Can't build zipfile for search results:\n%s\n" % htmlescape(msg))
    else:

        fp = response.open()

        title = "Documents related to %s" % repr(doc.get_metadata("title") or doc.id)

        fp.write("<head><title>%s</title>\n" % htmlescape(title))
        fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n')
        fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n')
        fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n')
        issue_javascript_head_boilerplate(fp)
        issue_title_styles(fp)
        fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR)
        issue_menu_definition(fp)
        fp.write('<h2>%s</h2><br>\n' % htmlescape(title))

        show_abstract(repo, doc, fp, True, showpagesearch=False)

        fp.write('<p><hr><b>Context documents:</b><br>')
        for related, score in docs:
            show_title (fp, related, {related.id: score}, True)

        fp.write('<p><hr><b>Like this:</b><br>')
        for related, score in likethis:
            show_title (fp, related, { related.id: score }, True)
        fp.write('<p><i>query was:  %s</i>\n' % htmlescape(qstring))

        fp.write('<p><hr><b>Co-authored:</b><br>')
        for related, score in authored:
            show_title (fp, related, { related.id: score }, True)

        fp.write('<p><hr><b>Recently consulted:</b><br>')
        for related, score in recent:
            show_title (fp, related, { related.id: score }, True)

        fp.write('<p><hr><b>Other considerations:</b><br>')
        for explanation, related, score in others:
            fp.write('<p><i>%s</i><br>\n' % htmlescape(explanation))
            show_title (fp, related, { related.id: score }, True)

        fp.write('</body>\n')
        fp.close()