コード例 #1
0
def doc_categorize (repo, response, params):

    from uplib.basicPlugins import show_abstract, _is_sensible_browser
    from uplib.basicPlugins import show_title, STANDARD_BACKGROUND_COLOR, STANDARD_TOOLS_COLOR, STANDARD_LEGEND_COLOR
    from uplib.basicPlugins import __issue_javascript_head_boilerplate as issue_javascript_head_boilerplate
    from uplib.basicPlugins import __issue_menu_definition as issue_menu_definition
    from uplib.basicPlugins import __issue_title_styles as issue_title_styles

    global _CONFIGURATION
    if _CONFIGURATION is None:
        _CONFIGURATION = { "exclusions": [
            re.compile(x.strip()) for x in configurator.default_configurator().get("categorize-excluded-categories", "").split(",") if x.strip()]}

    def figure_size(count, avgsize):
        if avgsize < 0.0001:
            return 0.0001
        return math.sqrt(math.log((count * (math.e - 1))/avgsize + 1))

    doc_id = params.get("doc_id")
    if not doc_id:
        response.error(HTTPCodes.BAD_REQUEST, "No doc_id parameter specified.")
        return
    doc = repo.valid_doc_id(doc_id) and repo.get_document(doc_id)
    if not doc:
        response.error(HTTPCodes.BAD_REQUEST, "Invalid doc_id parameter '%s' specified." % doc_id)
        return
    fp = response.open()
    title = (doc.get_metadata("title") or doc.id).encode("UTF-8", "strict")
    fp.write("<head><title>Categorizing '%s'</title>\n" % htmlescape(title))
    fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n')
    fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n')
    fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n')
    issue_javascript_head_boilerplate(fp)
    issue_title_styles(fp)
    fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR)
    issue_menu_definition(fp)
    show_abstract(repo, doc, fp, _is_sensible_browser(response.user_agent), showpagesearch=False)
    fp.write("<hr />\n")
    doccats = [x.lower() for x in doc.get_category_strings()]
    for cat in doccats[:]:
        if cat.find('/') >= 0:
            parts = cat.split('/')
            for i in range(1, len(parts)):
                doccats.append('/'.join(parts[:i]))
    tags = find_likely_tags(doc)
    if tags:
        # try to remove duplicates
        stags = min(10, len(tags))
#         tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0]
#         count = 0
#         i = 0
#         while tagnames and (i < stags):
#             if tags[i][0] in tagnames:
#                 del tags[i]
#                 stags = min(10, len(tags))
#                 tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0]
#             else:
#                 i += 1

        fp.write("<center><small><i>Likely categories</i></small><br />")
        count = 0
        topscore = _adjust_score(*tags[0][1][:2])
        exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions")
        for name, (score, ndocs, ascore) in tags:

            if count > stags:
                break

            skip = False
            for exclusion in exclusions:
                if exclusion.match(name.lower()):
                    skip = True
                    break
            if skip:
                continue

            if count > 0:
                fp.write(" &middot; ")
            #size = max(0.5, (2/topscore) * ascore)
            size = 1
            color = (name.lower() in doccats) and "red" or "black"
            action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % (
                (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name))
            fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category (score=%.3f)">%s</a>' % (
                size, color, action,
                (name.lower() in doccats) and "remove" or "add",
                htmlescape(name), ascore, htmlescape(name)))
            count += 1
        fp.write("</center></p><hr />\n")
    fp.write('<form action="%s" method=get><center>Add a new category to this document: ' %
             ('/'.join(response.request_path.split('/')[:3]) + '/doc_add_category'))
    fp.write('<input type=hidden name="doc_id" value="%s">\n' % doc.id)
    fp.write('<input type=text name="tag" value="" size=40></form></center>\n')
    note(4, "doc_categorize:  retrieving repository categories... (%s)", time.ctime())
    cats = repo.get_categories_with_docs()
    note(4, "doc_categorize:  have categories (%s)", time.ctime())
    if cats:
        fp.write("<hr>\n<center><small><i>All categories</i></small><br />")
        avgsize = sum([len(x) for x in cats.values()]) / float(len(cats))
        catkeys = cats.keys()
        catkeys.sort(lambda x, y: cmp(x.lower(), y.lower()))
        first = True
        exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions")
        for name in catkeys:
            skip = False
            for exclusion in exclusions:
                if exclusion.match(name.lower()):
                    skip = True
                    break
            if skip:
                continue

            if not first:
                fp.write(" &middot; ")
            else:
                first = False
            size = max(0.5, figure_size(len(cats[name]), avgsize))
            color = (name.lower() in doccats) and "red" or "black"
            action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % (
                (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name))
            actionsee = '/action/basic/repo_search?query=%s' % (
                urllib.quote_plus('categories:"%s"' % name))
            fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category">%s</a>' % (
                size, color, action,
                (name.lower() in doccats) and "remove" or "add",
                htmlescape(name), htmlescape(name)))
            fp.write('<a style="font-size: %fem; color: %s; vertical-align: super;" href="%s" ' % (
                max(0.4, size/2), STANDARD_LEGEND_COLOR, actionsee) +
                     'title="see the %s document%s in the \'%s\' category" target="_blank">%d</a>' % (
                         (len(cats[name]) == 1) and "one" or str(len(cats[name])),
                         (len(cats[name]) != 1) and "s" or "", htmlescape(name), len(cats[name])))
                     
    fp.write("</body>\n")
コード例 #2
0
def related (repo, response, params):
    """
    Find other documents related to the query document.

    :param doc_id: the query document
    :type doc_id: UpLib doc ID string
    :param use-authorship: whether or not to use co-authorship as a measure of relatedness.  Defaults to "true".
    :type use-authorship: "true" or "false"
    :param use-history: whether or not to to use the use history (most recently used list) as a factor in the calculation.  Defaults to "true".
    :type use-history: "true" or "false"
    :param format: whether to return non-browser format results.  Specifying "xml" will cause an XML document to be returned containing the results.  Specifying "ziplist" will cause a zip file containing extra information about each document to be returned.  If the ``format`` parameter is not specified, an HTML page showing the results broken down by category is returned.
    :type format: "xml" or "ziplist" or none
    :result: list of other documents related to the query document.  See discussion of the ``format`` parameter.
    :rtype: varies
    """

    doc_id = params.get("doc_id")
    if not doc_id:
        response.error(HTTPCodes.BAD_REQUEST, "No doc_id specified.")
        return
    if not repo.valid_doc_id(doc_id):
        response.error(HTTPCodes.NOT_FOUND, "Invalid doc_id %s specified." % doc_id)
        return
    doc = repo.get_document(doc_id)

    use_authorship = (params.get("use-authorship") or "true") == "true"
    use_history = (params.get("use-history") or "true") == "true"    

    docs, likethis, authored, recent, others, qstring = find_related(doc, True, use_history=use_history, use_authorship=use_authorship)

    if response.xml_request or (params.get("format") == "xml"):

        retval = getDOMImplementation().createDocument(None, "result", None)
        e = retval.createElement('paradigm')
        e.setAttribute('id', doc_id)
        e.setAttribute("title", _safe_title(doc))
        e.setAttribute('use-history', use_history and "true" or "false")
        e.setAttribute('use-authorship', use_authorship and "true" or "false")
        retval.documentElement.appendChild(e)
        g = retval.createElement('similar')
        g.setAttribute('query', qstring)
        for doc, score in likethis:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        g = retval.createElement('co-authored')
        for doc, score in authored:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        g = retval.createElement('recent')
        for doc, score in recent:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        g = retval.createElement('linked')
        for doc, score in others:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        g = retval.createElement('combined')
        for doc, score in docs:
            e = retval.createElement('document')
            e.setAttribute('doc_id', doc.id)
            e.setAttribute('score', str(score))
            title = _safe_title(doc)
            note("title is %s", repr(title))
            e.setAttribute('title', title)
            g.appendChild(e)
        retval.documentElement.appendChild(g)
        fp = response.open("application/xml;charset=utf-8")
        fp.write(retval.toxml("UTF-8") + "\n")
        fp.close()

    elif params.get("format") == "ziplist":
        no_icon = (params.get("no-icon") == "true")
        include_doc_functions = params.get("include-doc-functions")
        tpath = tempfile.mktemp()
        zf = zipfile.ZipFile(tpath, "w")
        try:
            try:
                for doc, score in docs:
                    zf.writestr(doc.id.encode("ASCII", "strict") + "/", "")
                    zf.writestr(doc.id.encode("ASCII", "strict") + "/score", str(score))
                    if not no_icon:
                        zf.writestr(doc.id.encode("ASCII", "strict") + "/first.png", doc.document_icon())
                    if include_doc_functions:
                        zf.writestr(doc.id.encode("ASCII", "strict") + "/doc_functions.txt", get_doc_functions(doc))
                    zf.writestr(doc.id.encode("ASCII", "strict") + "/metadata.txt", doc.metadata_text())
            finally:
                zf.close()
            response.return_file("application/x-uplib-searchresults-zipped", tpath, True)
        except:
            msg = string.join(traceback.format_exception(*sys.exc_info()))
            os.remove(tpath)
            note("Exception building zipfile for search results:\n%s", msg)
            response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Can't build zipfile for search results:\n%s\n" % htmlescape(msg))
    else:

        fp = response.open()

        title = "Documents related to %s" % repr(doc.get_metadata("title") or doc.id)

        fp.write("<head><title>%s</title>\n" % htmlescape(title))
        fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n')
        fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n')
        fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n')
        issue_javascript_head_boilerplate(fp)
        issue_title_styles(fp)
        fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR)
        issue_menu_definition(fp)
        fp.write('<h2>%s</h2><br>\n' % htmlescape(title))

        show_abstract(repo, doc, fp, True, showpagesearch=False)

        fp.write('<p><hr><b>Context documents:</b><br>')
        for related, score in docs:
            show_title (fp, related, {related.id: score}, True)

        fp.write('<p><hr><b>Like this:</b><br>')
        for related, score in likethis:
            show_title (fp, related, { related.id: score }, True)
        fp.write('<p><i>query was:  %s</i>\n' % htmlescape(qstring))

        fp.write('<p><hr><b>Co-authored:</b><br>')
        for related, score in authored:
            show_title (fp, related, { related.id: score }, True)

        fp.write('<p><hr><b>Recently consulted:</b><br>')
        for related, score in recent:
            show_title (fp, related, { related.id: score }, True)

        fp.write('<p><hr><b>Other considerations:</b><br>')
        for explanation, related, score in others:
            fp.write('<p><i>%s</i><br>\n' % htmlescape(explanation))
            show_title (fp, related, { related.id: score }, True)

        fp.write('</body>\n')
        fp.close()