def setFilterPackageInfo(self, arg):
     fpspec = filters_factory.FilterPackageSpec(arg)
     if fpspec.is_url:
         self.ui.lblInfo.setText("Location: <b>{}</b>".format(htmlescape(str(fpspec.url))))
     else:
         self.ui.lblInfo.setText("<b>{}:</b> {}".format(htmlescape(str(fpspec.fpname)),
                                                        htmlescape(str(fpspec.fpdir))))
 def sentence_report(self):
     """Returns
     -------
     unicode
         HTML containing misclassified sentences.
     """
     y_true, y_pred, sentences = self._data['y_true'], self._data[
         'y_pred'], self._data['sentences']
     labels = self._data['labels']
     D = defaultdict(list)
     # divide sentences /
     for i, (t, p) in enumerate(zip(y_true, y_pred)):
         if t != p:
             D[(t, p)].append(sentences[i])
     html = '<h3>Misclassified sentences</h3>'
     for s, s_label in enumerate(labels):
         for d, d_label in enumerate(labels):
             if s == d:
                 continue
             sents = D[(s, d)]
             if len(sents) > 0:
                 html += '<table><tr><th>{0} &#8594; {1}. Count: {2}.</th></tr>'.format(
                     htmlescape(s_label), htmlescape(d_label), len(sents))
                 for sent in sents:
                     html += '<tr><td>{0}</td></tr>'.format(
                         htmlescape(sent))
                 html += '</table>'
     return html
Example #3
0
    def on_get(self, req, resp):
        """This is what actually serves the resource and displays the website and list."""

        # We get the server id parameter as a string, if there isn't a server_id parameter, or it doesn't exist in the server_user_list, we return a 404
        requested_server_id = req.params.get("serverid")

        # If the parameter didn't exist, it's None
        if requested_server_id is None:
            # We give back a 404
            resp.body = "That server hasn't enabled this feature. (No server ID)"
            resp.status = falcon.status_codes.HTTP_NOT_FOUND

            # We log
            self.log_info("Got invalid server id in get request.")
            return

        # We check that the serverid parameter wasn't given multiple times, and then passed to us as a list
        if isinstance(requested_server_id, list):
            # We give back a 404
            resp.body = "That server hasn't enabled this feature. (Invalid server ID)"
            resp.status = falcon.status_codes.HTTP_NOT_FOUND

            # We log
            self.log_info("Got invalid server id in get request.")
            return

        # We check that the specified server id is a key in the server_user_list
        if not requested_server_id in self.server_user_list:
            # We give back a 404
            resp.body = "That server hasn't enabled this feature. (No data)"
            resp.status = falcon.status_codes.HTTP_NOT_FOUND

            # We log
            self.log_info("Got invalid server id in get request.")
            return

        # We create the html response
        # We begin by creating a list of user entries, sorted by escaped lowercase username alphabetically
        user_sorted_list = sorted(self.server_user_list[requested_server_id],
                                  key=lambda user: htmlescape(user["username"].lower(), quote=True))

        # We create a list of the list_entry html file, and format each one according to the user_sorted_list
        list_entries_html = [
            self.dynamic_html.format(x["icon_url"], htmlescape(x["username"], quote=True), x["last_seen_time"]) for _, x
            in enumerate(user_sorted_list)]

        # We set the content_type header
        resp.content_type = "text/html"

        # We format the static html with the list entries
        resp.body = self.static_html.format("".join(list_entries_html))

        # We return successfully, and we log it
        resp.status = falcon.HTTP_OK

        self.log_info("Served userlist page for server id {0}.".format(requested_server_id))
 def updateSrcDisplay(self):
     if self.data.src:
         html = "<html><head/><body>"
         if len(self.data.src) == 1:
             html += "<p>" + htmlescape(self.data.src[0]) + "</p>"
         else:
             html += "<ol>" + "".join([ "<li>"+htmlescape(x)+"</li>" for x in self.data.src ]) + "</ol>"
         html += "</body></html>"
         self.ui.txtSources.setHtml(html)
     else:
         self.ui.txtSources.setHtml(
             "<html><head/><body><p style=\"color: #808080; font-style:italic\">(no sources set)</p></body></html>"
         )
Example #5
0
    def _display_header(self):

        rawheader = self.bibolamaziFile.rawHeader().strip()
        if rawheader:
            fileinfohtml = ("<h2>Raw Header</h2>\n"
                            "<p class=\"shadow\">The top section of the bibolamazi file "
                            "is ignored by bibolamazi.  Whatever bibtex entries listed here "
                            "will not be affected by bibolamazi filters and will be retained "
                            "as is at the top of the file.  These bibtex entries are seen by "
                            "latex as regular entries that have not been filtered by bibolamazi.  "
                            "This portion of the file cannot be edited here; use your favorite "
                            "text editor to edit.</p>"
                            "<pre class=\"small\">" + htmlescape(rawheader) + "</pre>")

        dark_mode = uiutils.is_dark_mode(self)

        thehtml = textwrap.dedent('''\
        <!DOCTYPE HTML>
        <html>
          <head>
            <style type="text/css">
              %(basecss)s
              .source { margin: 0.5em 0px 0px 0px; }
              .filter { margin: 0.3em 0px 0px 0px; }
              .filterdescription { font-style: italic; margin-left: 2.5em; }
            </style>
          </head>
          <body>
            %(content)s
          </body>
        </html>''') % {
            'basecss': helpbrowser.getCssHelpStyle(dark_mode=dark_mode),
            'content': helpbrowser.wrapInHtmlContentContainer(fileinfohtml, width=800)
        }
        self.ui.txtInfo.setHtml(thehtml)
Example #6
0
def _html_row(tag, unsafe, css_cls, css_style, cell_values, colwidths,
              colaligns):
    try:
        from html import escape as htmlescape
    except ImportError:
        from cgi import escape as htmlescape

    if not css_cls:
        css_cls = {}

    if not css_style:
        css_style = {}

    cols = []
    typ = 'header_cols' if tag == HTML.TH else 'cols'

    for i, v in enumerate(cell_values):
        v = v if unsafe else htmlescape(v)

        cols.append(
            html_tag(tag, v.strip(),
                     css_cls.get(typ, [None] * (i + 1))[i],
                     css_style.get(typ, [None] * (i + 1))[i]))

    row = html_tag(HTML.TR, ''.join(cols).strip(), css_cls.get('row'),
                   css_style.get('row'))

    if tag == HTML.TH:
        return f"<{HTML.TABLE}>\n<{HTML.THEAD}>\n{row}\n</{HTML.THEAD}>\n<{HTML.TBODY}>"
    else:
        return row
Example #7
0
 def _update_gui_state(self):
     if self.ui.stk.currentWidget() == self.ui.pageUsername:
         self.ui.stk.setCurrentWidget(self.ui.pageUsername)
         self.ui.btnBack.setVisible(False)
         self.ui.btnNext.setVisible(True)
         self.ui.btnNext.setEnabled(True if self.ui.txtUser.text() else False)
         self.ui.btnOk.setVisible(False)
     elif self.ui.stk.currentWidget() == self.ui.pageRepo:
         username = self.ui.txtUser.text()
         self.ui.lblPromptRepo.setTextFormat(Qt.RichText)
         self.ui.lblPromptRepo.setText("Repositories for <b>{}</b>".format(htmlescape(username)))
         try:
             repolist = self._get_repolist_for_user(username)
             if self.ui.cbxRepos.repolist != repolist:
                 with BlockedSignals(self.ui.cbxRepos):
                     self.ui.cbxRepos.clear()
                     for repo in repolist:
                         self.ui.cbxRepos.addItem(repo)
                     self.ui.cbxRepos.repolist = repolist
         except Exception as e:
             logger.debug("Ignoring exception ... %r", e)
             logger.exception("Ignoring exception")
             pass
         self.ui.btnBack.setVisible(True)
         self.ui.btnNext.setVisible(False)
         self.ui.btnOk.setVisible(True)
         self.ui.btnOk.setEnabled(True if self.ui.cbxRepos.currentText() else False)
 def sentence_report(self):
     """Returns
     -------
     unicode
         HTML containing misclassified sentences.
     """
     y_true, y_pred, sentences = self._data['y_true'], self._data['y_pred'], self._data['sentences']
     labels = self._data['labels']
     D = defaultdict(list)
     # divide sentences / 
     for i, (t, p) in enumerate(zip(y_true, y_pred)):
         if t != p:
             D[(t, p)].append(sentences[i])
     html = '<h3>Misclassified sentences</h3>'
     for s, s_label in enumerate(labels):
         for d, d_label in enumerate(labels):
             if s == d:
                 continue
             sents = D[(s, d)]
             if len(sents) > 0:
                 html += '<table><tr><th>{0} &#8594; {1}. Count: {2}.</th></tr>'.format(htmlescape(s_label),
                                                                                        htmlescape(d_label),
                                                                                        len(sents))
                 for sent in sents:
                     html += '<tr><td>{0}</td></tr>'.format(htmlescape(sent))
                 html += '</table>'
     return html
Example #9
0
def richtext_to_plaintext(text, default="", escape=False) -> str:
    is_draftjs, text = try_parse_draftjs(text or default)

    if not is_draftjs:
        text = html_to_plaintext(text)

    if escape:
        text = htmlescape(text)
    return text
Example #10
0
def _get_help_page_general(pathitems, kwargs):
    
    if pathitems == ['welcome']:

        canonpath = '/general/welcome'
        _get_help_canonpath_check(canonpath, kwargs)

        return HelpTopicPage.makeMarkdownPage(
            HELP_WELCOME,
            title="Welcome",
            canonpath=canonpath
        )

    if pathitems == ['cmdline']:

        canonpath = '/general/cmdline'
        _get_help_canonpath_check(canonpath, kwargs)

        p = kwargs.pop('parser', None)
        if p is None:
            from . import main as bibolamazimain
            p = bibolamazimain.get_args_parser()

        return HelpTopicPage.makeTxtPage(
            "\n".join(helptext_prolog_lines()) + "\n\n" +
            p.format_help(),
            title="Command-Line Help",
            canonpath=canonpath
        )

    if pathitems == ['version']:

        canonpath = '/general/version'
        _get_help_canonpath_check(canonpath, kwargs)

        return  HelpTopicPage.makeMarkdownPage(
            htmlescape("\n\n".join(helptext_prolog_lines())),
            title="Version",
            canonpath=canonpath
        )

    if pathitems == ['cmdlversion']:

        canonpath = '/general/cmdlversion'
        _get_help_canonpath_check(canonpath, kwargs)

        return HelpTopicPage.makeTxtPage(
            TMPL_VERSION_INFO.format(
                version=butils.get_version(),
                copyrightyear=butils.get_copyrightyear()
            ),
            title="Version",
            canonpath=canonpath
        )

    raise HelpPageError("Unknown help path: /{}".format('/'.join(kwargs['basepathitems']+pathitems)))
Example #11
0
def _prepare_yaml_element(element):
    """Prepare a yaml element for display in html"""
    element["time"] = element["time"][11:]
    for key, val in element.items():
        if type(element[key]) == str:
            element[key] = htmlescape(val)
    if "message" in element:
        element["message"] = formatting.to_html(element["message"])
        element["message"] = url_pat.sub(r"<a href='\1'>\1</a>",
                                         element["message"])
Example #12
0
 def classification_report_list(self, title, data):
     html = ('<table>'
                 '<tr><th colspan="5">{0}</th></tr>'
                 '<tr><th>Class</th><th>Precision</th><th>Recall</th><th>F1</th><th>Support/Count</th>'
             '</tr>').format(title)
     for l, p, r, f, s in data:
         row = '<tr><td>{0}</td><td>{1:.1f}</td><td>{2:.1f}</td><td>{3:.1f}</td><td>{4}</td></tr>'
         row = row.format(htmlescape(l), p*100, r*100, f*100, s)
         html += row
     return html + '</table>'
Example #13
0
def _prepare_yaml_element(element):
    """Prepare a yaml element for display in html"""
    element["time"] = element["time"][11:]
    for key, val in element.items():
        if isinstance(element[key], str):
            element[key] = htmlescape(val)
    if "message" in element:
        element["message"] = formatting.to_html(element["message"])
        element["message"] = url_pat.sub(r"<a href='\1'>\1</a>",
                                         element["message"])
Example #14
0
 def _search_logs(self, request):
     querystr = bytes_to_str(request.args[b"q"][0])
     if b"page" in request.args:
         try:
             page = int(request.args[b"page"][0])
         except ValueError:
             page = -1
     else:
         page = 1
     if page < 1:
         log_data = "Invalid page number specified"
         request.write(
             str_to_bytes(
                 search_page_template.format(log_data=log_data,
                                             title=self.title,
                                             header=header,
                                             footer=footer,
                                             channel=self.channel)))
         request.finish()
         return
     with self.ix.searcher() as searcher:
         query = QueryParser("content", self.ix.schema).parse(querystr)
         res_page = searcher.search_page(query,
                                         page,
                                         pagelen=self.pagelen,
                                         sortedby="date",
                                         reverse=True)
         res_page.results.fragmenter = highlight.SentenceFragmenter(
             sentencechars=u".!?\u2026", charlimit=None)
         log_data = ""
         for hit in res_page:
             log_data += ("<ul><div><label><a href='{channel}?date="
                          "{date}'>{date}</a></label>".format(
                              channel=self.channel_link(),
                              date=hit["date"].strftime("%Y-%m-%d")) +
                          hit.highlights("content") + "</div></ul>")
         else:
             if not res_page.is_last_page():
                 log_data += "<a href='?q={}&page={}'>Next</a>".format(
                     querystr, page + 1)
         if not res_page:
             log_data = "No Logs found containg: {}".format(
                 htmlescape(querystr))
     request.write(
         str_to_bytes(
             search_page_template.format(log_data=log_data,
                                         title=self.title,
                                         header=header,
                                         footer=footer,
                                         channel=self.channel_link())))
     request.finish()
Example #15
0
def insert_spans(text, spans, css_classes):
    """Insert spans with specified css classes into text
    and html escape all other characters."""
    positions = []
    for (start, end), classes in zip(spans, css_classes):
        start_token = (start, 1, classes)
        end_token = (end, 0, None)
        positions.append(start_token)
        positions.append(end_token)
    positions.sort()
    text = [htmlescape(c) for c in text]
    for pos, t, classes in reversed(positions):
        if t == 1:
            text[pos:pos] = '<span style="{0}">'.format(classes)
        else:
            text[pos:pos] = '</span>'
    return ''.join(text)
Example #16
0
 def _search_logs(self, request):
     querystr = unicode(request.args[b"q"][0], "utf-8")
     if b"page" in request.args:
         try:
             page = int(request.args[b"page"][0])
         except ValueError:
             page = -1
     else:
         page = 1
     if page < 1:
         log_data = "Invalid page number specified"
         request.write(str_to_bytes(search_page_template.format(
             log_data=log_data, title=self.title, header=header,
             footer=footer, channel=self.channel)))
         request.finish()
         return
     with self.ix.searcher() as searcher:
         query = QueryParser("content", self.ix.schema).parse(querystr)
         res_page = searcher.search_page(query, page,
                                         pagelen=self.pagelen,
                                         sortedby="date", reverse=True)
         res_page.results.fragmenter = highlight.SentenceFragmenter(
             sentencechars=u".!?\u2026", charlimit=None)
         log_data = ""
         for hit in res_page:
             log_data += ("<ul><div><label><a href='{channel}?date="
                          "{date}'>{date}</a></label>".format(
                              channel=self.channel_link(),
                              date=hit["date"].strftime("%Y-%m-%d")) +
                          hit.highlights("content") +
                          "</div></ul>")
         else:
             if not res_page.is_last_page():
                 log_data += "<a href='?q={}&page={}'>Next</a>".format(
                     querystr, page + 1)
         if not res_page:
             log_data = "No Logs found containg: {}".format(
                 htmlescape(querystr))
     if sys.version_info.major < 3:
         log_data = log_data.encode("utf-8")
     request.write(str_to_bytes(search_page_template.format(
         log_data=log_data, title=self.title, header=header,
         footer=footer, channel=self.channel_link())))
     request.finish()
Example #17
0
def bibolamazi_error_html(errortxt, wrap_pre=True):

    def a_link(m):
        return "<a href=\"action:/goto-bibolamazi-file-line/%d\">%s</a>" %(
            int(m.group('lineno')),
            m.group()
            )

    errortxt = str(htmlescape(errortxt, quote=True))
    errortxt = re.sub(r'@:.*line\s+(?P<lineno>\d+)', a_link, errortxt)
    try:
        # if wrap_pre = (start_tag, end_tag)
        return wrap_pre[0] + errortxt + wrap_pre[1]
    except (TypeError,IndexError):
        pass
    if wrap_pre:
        # if wrap_pre = True
        return ("<pre style=\"white-space: pre-wrap\">"+errortxt+"</pre>")
    return errortxt
Example #18
0
    def misclassified_data(self):
        y_true, y_pred = self._data['y_true'], self._data['y_pred']
        sentences = self._data['sentences']
        df = self._data['dataframe']
        labels = self._data['labels']
        sigfeatures = self._data['sigfeatures']
        settings = self._data['settings']
        feature_names = self._data['feature_names']
        ta = TextAnnotator(settings.unifier)

        D = defaultdict(list)
        # divide sentences /
        for i, (t, p) in enumerate(zip(y_true, y_pred)):
            if t != p:
                D[(t, p)].append(i)
                assert labels[t] == df[settings.label][i]
        html = """<!DOCTYPE html>
            <html>
            <head>
                <meta charset="utf-8">
                <title>Classification report</title>
            </head>

            <body>"""
        html += '<h3>Misclassified data</h3>'
        for s, s_label in enumerate(labels):
            for d, d_label in enumerate(labels):
                if s == d:
                    continue
                idxs = D[(s, d)]
                if len(idxs) > 0:
                    html += '<br/><br/><br/><b>True label:</b> {0}<br/><b>Predicted label:</b> {1}<br/><b>Count:</b> {2}<br/>\n'.format(
                        s_label, d_label, len(idxs))
                    subdf = df.iloc[idxs].fillna('')
                    for idx in idxs:
                        features = [(feature_names[featidx], value)
                                    for featidx, value in sigfeatures[idx]]
                        for col in settings.features:
                            subdf[col][idx] = ta.annotate_important_features(
                                htmlescape(subdf[col][idx]), features)
                    html += subdf.to_html(index=False, escape=False)
        return html + '</body></html>'
Example #19
0
    def generate_profile(self, escape=False):
        profile = {}

        profile['PayloadUUID'] = self.profile_uuid
        profile['PayloadType'] = "Configuration"
        profile['PayloadOrganization'] = self.profile_organization
        profile['PayloadIdentifier'] = self.profile_uuid
        profile['PayloadDisplayName'] = self.profile_name
        profile['PayloadDescription'] = self.profile_description
        profile['PayloadVersion'] = 1
        profile['PayloadEnabled'] = True
        profile['PayloadRemovalDisallowed'] = True

        profile['PayloadContent'] = self.payloads
        formatted_profile = plistlib.writePlistToString(profile)

        if escape:
            return htmlescape(formatted_profile)
        else:
            return formatted_profile
    def misclassified_data(self):
        y_true, y_pred = self._data['y_true'], self._data['y_pred']
        sentences = self._data['sentences']
        df = self._data['dataframe']
        labels = self._data['labels']
        sigfeatures = self._data['sigfeatures']
        settings = self._data['settings']
        feature_names = self._data['feature_names']
        ta = TextAnnotator(settings.unifier)
        
        D = defaultdict(list)
        # divide sentences /
        for i, (t, p) in enumerate(zip(y_true, y_pred)):
            if t != p:
                D[(t, p)].append(i)
                assert labels[t] == df[settings.label][i]
        html = """<!DOCTYPE html>
            <html>
            <head>
                <meta charset="utf-8">
                <title>Classification report</title>
            </head>

            <body>"""
        html += '<h3>Misclassified data</h3>'
        for s, s_label in enumerate(labels):
            for d, d_label in enumerate(labels):
                if s == d:
                    continue
                idxs = D[(s, d)]
                if len(idxs) > 0:
                    html += '<br/><br/><br/><b>True label:</b> {0}<br/><b>Predicted label:</b> {1}<br/><b>Count:</b> {2}<br/>\n'.format(s_label, d_label, len(idxs))
                    subdf = df.iloc[idxs].fillna('')
                    for idx in idxs:
                        features = [(feature_names[featidx], value) for featidx, value in sigfeatures[idx]]
                        for col in settings.features:
                            subdf[col][idx] = ta.annotate_important_features(htmlescape(subdf[col][idx]), features)
                    html += subdf.to_html(index=False, escape=False)
        return html + '</body></html>'
Example #21
0
    def contentAsHtmlFragment(self):

        if 'htmlfragment' in self._content:
            return self.getContent('htmlfragment')

        if 'markdown' in self._content:

            # format documentation using markdown2.  Import this now only, so
            # that as long as we don't need markdown->html then this module
            # doesn't have to be installed
            import markdown2

            return ( markdown2.markdown(
                self.getContent('markdown'),
                extras=["footnotes", "fenced-code-blocks",
                        "smarty-pants", "tables"]) )

        if 'txt' in self._content:
            return ("<pre class=\"txtcontent\">" + htmlescape(self.getContent('txt')) + "</pre>")

        raise HelpPageError("Can't convert content to HTML, we have {}"
                            .format(", ".join(self._content.keys())))
Example #22
0
    def dolog(self, txt, levelno=logging.INFO):

        try:
            html = txt.getHtml() # in case of a PreformattedHtml instance
        except AttributeError:
            html = str(htmlescape(txt)) # in case of a simple plain text string

        sty = ''
        if levelno == logging.ERROR or levelno == logging.CRITICAL:
            sty = "color: #ff0000; font-weight: bold;"
        elif levelno == logging.WARNING:
            sty = "color: rgb(150,80,0); font-weight: bold;"
        elif levelno == logging.INFO:
            sty = "font-weight: normal;" # default color #"color: #000000; font-weight: normal;"
        elif levelno == logging.DEBUG or levelno == blogger.LONGDEBUG:
            sty = "color: #7f7f7f; font-weight: normal;"
        else:
            # unknown level
            sty = "color: #7f7f7f; font-weight: normal;"

        sty += "white-space: pre;"

        self.logHtml.emit("<span style=\"%s\">%s\n</span>"%(sty, html))
Example #23
0
    def significant_features(self):
        settings = self._data['settings']
        feature_names = self._data['feature_names']
        coef = self._data['coef']
        ta = TextAnnotator(settings.unifier)
        labels = self._data['labels']
        html = '<h3>Significant features by labels</h3>\n'
        html += '<p>Below is a list with at most 100 most significant features for each label, that are used in the classification process.</p>'
        html += '<p>Features written in <b>black</b> and <span style="color:red">red</span> denote features that are respectively contributing'
        html += ' towards and against assigning the particular class label. '
        html += 'Both are equally important, but they should be interpreted differently, when debugging the classifier.</p>'
        html += '<table style="border: 1px solid black">'

        for idx, label in enumerate(labels):
            features = get_sig_features(idx, coef, 100)
            features = [(feature_names[featidx], value)
                        for featidx, value in features]
            features = ta.trim_feature_prefixes(features)
            features = ', '.join(
                [ta.annotate_color(f, v) for f, v in features])
            html += '<tr><td style="border-bottom: 1px solid black">{0}</td><td style="border-bottom: 1px solid black">{1}</td></tr>'.format(
                htmlescape(label), features)
        html += '</table>'
        return html
Example #24
0
def get_opening_mark(classes):
    return OPENING_MARK.format(classes=htmlescape(classes))
Example #25
0
def fragmentsinresults(form, doexport=False):
	"""Extract recurring fragments from search results."""
	engine = form.get('engine', 'tgrep2')
	if engine not in ('tgrep2', 'frag'):
		yield "Only applicable to treebanks."
		return
	gotresults = False
	filenames = {EXTRE.sub('', os.path.basename(a)): a
			for a in CORPORA[engine].files}
	selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)}
	start, end = getslice(form.get('slice'))
	uniquetrees = set()
	if not doexport:
		url = 'fragments?' + url_encode(dict(export='csv', **form),
				separator=b';')
		yield ('<pre>Query: %s\n'
				'Fragments (showing up to %d fragments '
				'in the first %d search results from selected texts;\n'
				'ordered by (freq ** 0.5 * numwords ** 2) '
				'<a href="%s">Export</a>):\n'
				% (form['query'] if len(form['query']) < 128
					else form['query'][:128] + '...',
					FRAGLIMIT, SENTLIMIT, url))
	disc = engine != 'tgrep2'
	if disc:
		fragments.PARAMS.update(disc=True, fmt='discbracket')
	else:
		fragments.PARAMS.update(disc=False, fmt='bracket')
	for n, (_, _, treestr, _) in enumerate(CORPORA[engine].sents(
			form['query'], selected, start, end,
			maxresults=SENTLIMIT, brackets=True)):
		if n == 0:
			gotresults = True
		if engine == 'tgrep2':
			line = treestr.replace(" )", " -NONE-)") + '\n'
		elif engine == 'frag':
			line = treestr + '\n'
		else:
			raise ValueError
		uniquetrees.add(line.encode('utf8'))
	if not gotresults and not doexport:
		yield "No matches."
		return
	# TODO: get counts from whole text (preload)
	import tempfile
	with tempfile.NamedTemporaryFile(delete=True) as tmp:
		tmp.writelines(uniquetrees)
		tmp.flush()
		results, approxcounts = fragments.regular([tmp.name], 1, None, 'utf8')
	if disc:
		results = nlargest(FRAGLIMIT, zip(results, approxcounts),
				key=lambda ff: sum(1 for a in ff[0][1] if a) ** 2 * ff[1] ** 0.5)
	else:
		results = nlargest(FRAGLIMIT, zip(results, approxcounts),
				key=lambda ff: sum(1 for _
				in re.finditer(r'[^ ()]\)', ff[0])) ** 2 * ff[1] ** 0.5)
	gotresults = False
	if not doexport:
		yield "<ol>"
	for tree, freq in results:
		gotresults = True
		if disc:
			tree, sent = tree
			sent = ' '.join(a or '' for a in sent)
		if doexport:
			if disc:
				yield '%s\t%s\t%s\n' % (tree, sent, freq)
			else:
				yield '%s\t%s\n' % (tree, freq)
		else:
			if disc:
				link = '<a href="draw?tree=%s;sent=%s">draw</a>' % (
						quote(tree.encode('utf8')), quote(sent.encode('utf8')))
				sent = GETLEAVES.sub(' <font color=red>\\1</font>',
						htmlescape(' ' + sent + ' '))
				tree = htmlescape(tree) + ' ' + sent
			else:
				link = '<a href="draw?tree=%s">draw</a>' % (
						quote(tree.encode('utf8')))
				tree = GETLEAVES.sub(' <font color=red>\\1</font>',
						htmlescape(tree))
			tree = GETFRONTIERNTS.sub('(<font color=blue>\\1</font> )', tree)
			yield "<li>freq=%3d [%s] %s" % (freq, link, tree)
	if not doexport:
		yield "</ol>"
		if gotresults:
			yield '</pre>'
		else:
			yield "No fragments with freq > %d & nodes > %d." % (
					MINNODES, MINFREQ)
Example #26
0
def sents(form, dobrackets=False):
	"""Return search results as terminals or in bracket notation."""
	gotresults = False
	engine = form.get('engine', 'tgrep2')
	filenames = {EXTRE.sub('', os.path.basename(a)): a
			for a in CORPORA[engine].files}
	selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)}
	start, end = getslice(form.get('slice'))
	url = '%s?%s' % ('trees' if dobrackets else 'sents',
			url_encode(dict(export='csv', **form), separator=b';'))
	yield ('<pre>Query: %s\n'
			'Sentences (showing up to %d per text; '
			'export: <a href="%s">plain</a>, '
			'<a href="%s">with line numbers</a>):\n' % (
				form['query'] if len(form['query']) < 128
				else form['query'][:128] + '...',
				SENTLIMIT, url, url + ';linenos=1'))
	try:
		tmp = CORPORA[engine].sents(form['query'],
					selected, start, end, maxresults=SENTLIMIT,
					brackets=dobrackets)
	except Exception as err:
		yield '<span class=r>%s</span>' % htmlescape(str(err).splitlines()[-1])
		return
	# NB: avoid sorting; rely on the fact that matches for each filename are
	# already contiguous. filenames will be in arbitrary order due to
	# multiprocessing
	for n, (filename, results) in enumerate(groupby(tmp, itemgetter(0))):
		textno = selected[filename]
		text = TEXTS[textno]
		if 'breakdown' in form:
			if dobrackets:
				breakdown = Counter(high for _, _, _, high, _ in results)
			else:
				breakdown = Counter(re.sub(
					' {2,}', ' ... ',
					''.join(char if n in high1 or n in high2 else ' '
						for n, char in enumerate(sent)))
					for _, _, sent, high1, high2 in results)
			yield '\n%s\n' % text
			for match, cnt in breakdown.most_common():
				gotresults = True
				yield '%5d  %s\n' % (cnt, match)
			continue
		for m, (_filename, sentno, sent, high1, high2) in enumerate(results):
			if m == 0:
				gotresults = True
				yield ("\n%s: [<a href=\"javascript: toggle('n%d'); \">"
						"toggle</a>] <ol id=n%d>" % (text, n, n))
			link = ('<a href="browse?text=%d;sent=%d%s%s">tree</a>'
					'|<a href="browsesents?%s">context</a>' % (
					textno, sentno, ';nofunc' if 'nofunc' in form else '',
					';nomorph' if 'nomorph' in form else '',
					url_encode(dict(text=textno, sent=sentno, highlight=sentno,
						query=form['query'], engine=engine), separator=b';')))
			if dobrackets:
				sent = htmlescape(sent.replace(" )", " -NONE-)"))
				out = sent.replace(high1, "<span class=r>%s</span>" % high1)
			else:
				out = applyhighlight(sent, high1, high2)
			yield "<li>#%s [%s] %s\n" % (str(sentno).rjust(6), link, out)
		yield "</ol>"
	yield '</pre>' if gotresults else 'No matches.'
Example #27
0
def formatexception(e):
    if e is None:
        return ""
    return htmlescape("".join(traceback.format_exception(
        e, e, e.__traceback__)))
Example #28
0
def plot(data, total, title, width=800.0, unit='', dosort=True,
		target=None, target2=None):
	"""A HTML bar plot given a dictionary and max value."""
	if len(data) > 30 and target is not None:
		df = pandas.DataFrame(index=data)
		if len(title) > 50:
			title = title[:50] + '...'
		df[title] = pandas.Series(data, index=df.index)
		df[target.name] = target.loc[df.index]
		if target2 is not None:
			df[target2.name] = target2.loc[df.index]
		if iscategorical(target):
			df.sort_values(by=target.name, inplace=True)
			if target2 is None:
				# seaborn.barplot(target.name, title, data=df)
				seaborn.violinplot(x=target.name, y=title, data=df,
						split=True, inner="stick", palette='Set1')
			else:
				seaborn.barplot(target.name, title, data=df, hue=target2.name,
						palette='Set1')
			fig = plt.gcf()
			fig.autofmt_xdate()
		else:  # treat X-axis as continuous
			if target2 is None:
				seaborn.jointplot(target.name, title, data=df, kind='reg')
			else:
				seaborn.lmplot(target.name, title, data=df,
						hue=target2.name, palette='Set1')
		# Convert to D3, SVG, javascript etc.
		# import mpld3
		# result = mpld3.fig_to_html(plt.gcf(), template_type='general',
		# 		use_http=True)

		# Convert to PNG
		figfile = io.BytesIO()
		plt.tight_layout()
		plt.savefig(figfile, format='png')
		import base64
		result = '<div><img src="data:image/png;base64, %s"/></div>' % (
				base64.b64encode(figfile.getvalue()).decode('utf8'))
		plt.close()
		return result

	result = ['<div class=barplot>',
			('<text style="font-family: sans-serif; font-size: 16px; ">'
			'%s</text>' % title)]
	if target is not None:
		data = OrderedDict([(key, data[key]) for key in
				target.sort_values().index if key in data])
	keys = {key.split('_')[0] if '_' in key else key[0] for key in data}
	color = {}
	if len(keys) <= 5:
		color.update(zip(keys, range(1, 6)))
	keys = list(data)
	if dosort:
		keys.sort(key=data.get, reverse=True)
	for key in keys:
		result.append('<br><div style="width:%dpx;" class=b%d></div>'
				'<span>%s: %g %s</span>' % (
				int(round(width * data[key] / total)) if data[key] else 0,
				color.get(key.split('_')[0] if '_' in key else key[0], 1)
					if data[key] else 0,
				htmlescape(key), data[key], unit,))
	result.append('</div>\n')
	return '\n'.join(result)
Example #29
0
if format != 'applepages-export':
    print("Invalid format: ", format)
    sys.exit(1)


HTML_TEMPLATE = """\
<img src=\"data:application/pdf;base64,{b64data}\" alt=\"{alttxt}\" title=\"{alttxt}\">
"""

pdfcontents = None
with open(pdffile) as f:
    pdfcontents = f.read()

latexcode = os.environ.get('KLF_INPUT_LATEX', '')
# some substitutions in the latex string to make it more readable [duplicates
# C++ code from klfmime.cpp arrgh!!]

#    \! \, \; \:  -> simple space
latexcode = re.sub(r"\\[,;:!]", " ", latexcode)
#    \text{Hello}, \mathrm{Hilbert-Einstein}  -->  {the text}
latexcode = re.sub(r"\\(?:text|mathrm)\{((?:\w|\s|[._-])*)\}", r"{\1}", latexcode)
#    \var(epsilon|phi|...)    ->   \epsilon,\phi,...
latexcode = re.sub(r"\\var([a-zA-Z]+)", r"\\\1", latexcode)


print(HTML_TEMPLATE.format(
    b64data=quote_plus(base64.b64encode(pdfcontents)),
    alttxt=htmlescape(latexcode)
    ))
Example #30
0
    def gen_htmlfragment(filtname=filtname, filtinfo=filtinfo, kwargs=dict(kwargs)):

        html = "<h1>Filter: {}</h1>\n\n".format(filtname)

        fpn = filtinfo.filterpackagename
        html += "<p class=\"shadow\">In filter package <b>" + htmlescape(fpn) + "</b></p>\n\n"

        author = filtinfo.fclass.getHelpAuthor().strip()
        if author:
            html += "<p>" + htmlescape(author) + "</p>\n\n"

        desc = filtinfo.fclass.getHelpDescription().strip()
        if desc:
            html += "<p>" + htmlescape(desc) + "</p>\n\n"

        table_width_px_str = str(kwargs.get('html_table_width_px', 550))

        html_opt = ''
        html_doc = ''

        fopt = filtinfo.defaultFilterOptions()
        if fopt:
            # we're in business -- filter options

            html_opt += "<h2><a name=\"a-filter-options\"></a>Filter Options:</h2>\n\n"

            html_opt += "<table width=\""+table_width_px_str+"\">"

            for arg in fopt.filterOptions():
                sopt_arg_name = fopt.getSOptNameFromArg(arg.argname)
                html_opt += "<tr><th><a name=\"a-filter-option-{}\"></a>".format(urlquoteplus(arg.argname)) \
                    + htmlescape(sopt_arg_name) + "</th></tr>"
                html_opt += "<tr><td class=\"indent\" width=\""+table_width_px_str+"\">"
                html_opt += "<p class=\"inner\">" + htmlescape(arg.doc if arg.doc else '') + "</p>"

                if arg.argtypename:
                    typ = butils.resolve_type(arg.argtypename, filtinfo.fmodule)
                    if typ is bool:
                        html_opt += ("<p class=\"inner shadow\">Expects a boolean argument type" +
                                 " (True/1/Yes/On or False/0/No/Off)</p>")
                    elif typ is int:
                        html_opt += ("<p class=\"inner shadow\">Expects an integer as argument</p>")
                    elif hasattr(typ, '__doc__') and typ.__doc__: # e.g., is not None
                        docstr = typ.__doc__.strip()
                        if len(docstr):
                            html_opt += ("<p class=\"inner shadow\">Expects argument type " +
                                     "<code>" + htmlescape(arg.argtypename) + "</code>: "
                                     + docstr + "</p>")

                html_opt += "</td></tr>\n"

            if fopt.filterAcceptsVarArgs():
                html_opt += "<tr><th>(...)</th></tr>"
                html_opt += ("<tr><td class=\"indent\" width=\""+table_width_px_str+"\">This filter accepts "
                         "additional positional arguments (see doc below)</td></tr>")
            if fopt.filterAcceptsVarKwargs():
                html_opt += "<tr><th>(...=...)</th></tr>"
                html_opt += ("<tr><td class=\"indent\" width=\""+table_width_px_str+"\">This filter accepts "
                         "additional named/keyword arguments (see doc below)</td></tr>")

            html_opt += "</table>"

            html_opt += """

<p>Pass options with the syntax <code>-s</code><span
class="code-meta">OptionName</span><code>="</code><span class="code-meta">option
value</span><code>"</code> or <code>-d</code><span
class="code-meta">OptionName[</span><code>=True</code><span
class="code-meta">|</span><code>False</code><span class="code-meta">]</span>.
The form <code>-sXXX</code> is for passing strings (which must be quoted if
comprising spaces or special characters), and the form <code>-dXXX</code> is for
specifying boolean ON/OFF switches.</p>

"""

            html_doc += "<h2><a name=\"a-filter-doc\"></a>Filter Documentation:</h2>\n\n"

            html_doc += ("<div style=\"white-space: pre-wrap\">" + htmlescape(filtinfo.fclass.getHelpText())
                         + "</div>\n\n")

        elif hasattr(filtinfo.fmodule, 'format_help'):

            html_doc += ("<div style=\"white-space: pre-wrap\">" + htmlescape(filtinfo.fmodule.format_help())
                         + "</div>\n\n")

        else:
            
            html_doc += "<p style=\"font-style\">"+htmlescape(filtinfo.fclass.getHelpText())+"</p>\n\n"
            #html += "<p style=\"font-style\">(no additional help available)</p>"

        if html_opt and html_doc:
            html += '<p><b>Contents:</b></p>'
            html += '<ul><li><a href="#a-filter-opt">Filter Options</a></li>'
            html += '<li><a href="#a-filter-doc">Filter Documentation</li></ul>\n'

        html += html_opt
        html += html_doc

        return html
 def setFilterPackageError(self, errmsg):
     self.ui.lblInfo.setText("<span style=\"color: #800000\">{}</span>".format(htmlescape(errmsg)))
Example #32
0
def get_opening_mark(classes):
    return OPENING_MARK.format(classes=htmlescape(classes))
Example #33
0
def trees(form):
	"""Return visualization of parse trees in search results."""
	gotresults = False
	engine = form.get('engine', 'tgrep2')
	filenames = {EXTRE.sub('', os.path.basename(a)): a
			for a in CORPORA[engine].files}
	selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)}
	start, end = getslice(form.get('slice'))
	# NB: we do not hide function or morphology tags when exporting
	url = 'trees?' + url_encode(dict(export='csv', **form), separator=b';')
	yield ('<pre>Query: %s\n'
			'Trees (showing up to %d per text; '
			'export: <a href="%s">plain</a>, '
			'<a href="%s">with line numbers</a>):\n' % (
				form['query'] if len(form['query']) < 128
				else form['query'][:128] + '...',
				TREELIMIT, url, url + ';linenos=1'))
	try:
		tmp = CORPORA[engine].trees(form['query'],
				selected, start, end, maxresults=TREELIMIT,
				nomorph='nomorph' in form, nofunc='nofunc' in form)
	except Exception as err:
		yield '<span class=r>%s</span>' % htmlescape(str(err).splitlines()[-1])
		return
	for n, (filename, results) in enumerate(groupby(tmp, itemgetter(0))):
		textno = selected[filename]
		text = TEXTS[textno]
		if 'breakdown' in form:
			breakdown = Counter(DiscTree(
					max(high, key=lambda x: len(x.leaves())
						if isinstance(x, Tree) else 1).freeze(), sent)
					for _, _, _, sent, high in results if high)
			yield '\n%s\n' % text
			for match, cnt in breakdown.most_common():
				gotresults = True
				yield 'count: %5d\n%s\n\n' % (
						cnt, DrawTree(match, match.sent).text(
							unicodelines=True, html=True, funcsep='-'))
			continue
		for m, (_filename, sentno, tree, sent, high) in enumerate(results):
			if m == 0:
				gotresults = True
				yield ("==&gt; %s: [<a href=\"javascript: toggle('n%d'); \">"
						"toggle</a>]\n<span id=n%d>" % (text, n + 1, n + 1))
			link = ('<a href="browse?text=%d;sent=%d%s%s">browse</a>'
					'|<a href="browsesents?%s">context</a>' % (
					textno, sentno, ';nofunc' if 'nofunc' in form else '',
					';nomorph' if 'nomorph' in form else '',
					url_encode(dict(text=textno, sent=sentno,
						query=form['query'], engine=engine), separator=b';')))
			try:
				treerepr = DrawTree(tree, sent, highlight=high).text(
						unicodelines=True, html=True, funcsep='-')
			except ValueError as err:
				line = "#%s \nERROR: %s\n%s\n%s\n" % (
						sentno, err, tree, sent)
			else:
				line = "#%s [%s]\n%s\n" % (sentno, link, treerepr)
			yield line
		yield "</span>"
	yield '</pre>' if gotresults else "No matches."
Example #34
0
 def annotate_color(self, f, v):
         color = 'black'
         if v < 0:
             return '<span style="color:red">{0}</span>'.format(htmlescape(f))
         else:
             return '<b>{0}</b>'.format(htmlescape(f))
 def significant_features(self):
     settings = self._data['settings']
     feature_names = self._data['feature_names']
     coef = self._data['coef']
     ta = TextAnnotator(settings.unifier)
     labels = self._data['labels']
     html = '<h3>Significant features by labels</h3>\n'
     html += '<p>Below is a list with at most 100 most significant features for each label, that are used in the classification process.</p>'
     html += '<p>Features written in <b>black</b> and <span style="color:red">red</span> denote features that are respectively contributing'
     html += ' towards and against assigning the particular class label. '
     html += 'Both are equally important, but they should be interpreted differently, when debugging the classifier.</p>'
     html += '<table style="border: 1px solid black">'
     
     for idx, label in enumerate(labels):
         features = get_sig_features(idx, coef, 100)
         features = [(feature_names[featidx], value) for featidx, value in features]
         features = ta.trim_feature_prefixes(features)
         features = ', '.join([ta.annotate_color(f, v) for f, v in features])
         html += '<tr><td style="border-bottom: 1px solid black">{0}</td><td style="border-bottom: 1px solid black">{1}</td></tr>'.format(htmlescape(label), features)
     html += '</table>'
     return html
    def __init__(self, source, proposal):
        proposal = htmlescape(proposal)

        differ = Differ()
        self.diff = list(differ.compare(source.splitlines(1),
                                        proposal.splitlines(1)))
Example #37
0
def counts(form, doexport=False):
	"""Produce graphs and tables for a set of queries.

	Queries should be given one per line, optionally prefixed by a name and
	a normalization query::

		[name: ][normquery<tab>]query

	returns one graph for each query, and an overview with totals (optionally
	per category, if the first letters of each corpus name form a small set);
	"""
	# TODO: option to arrange graphs by text instead of by query
	engine = form.get('engine', 'tgrep2')
	filenames = {EXTRE.sub('', os.path.basename(a)): a
			for a in CORPORA[engine].files}
	selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)}
	start, end = getslice(form.get('slice'))
	target = METADATA[form['target']] if form.get('target') else None
	target2 = METADATA[form['target2']] if form.get('target2') else None
	if not doexport:
		url = 'counts?' + url_encode(dict(export='csv', **form),
				separator=b';')
		yield ('Counts from queries '
				'(<a href="%s">export to CSV</a>):\n' % url)
	# Combined results of all queries on each file
	combined = defaultdict(int)
	index = [TEXTS[n] for n in selected.values()]
	df = pandas.DataFrame(index=index)
	queries = querydict(form['query'])
	if not doexport:
		yield '<ol>%s</ol>\n' % '\n'.join(
				'<li><a href="#q%d">%s</a>' % (n, query)
				for n, query in enumerate(list(queries)
				+ ['Combined results', 'Overview'], 1))
	for n, (name, (normquery, query)) in enumerate(
			list(queries.items()) + [('Combined results', ('', None))], 1):
		cnts = Counter()
		sumtotal = 0
		relfreq = {}
		resultsindices = None
		if query is None:
			if len(df.columns) == 1:
				break
			results = combined
			legend = '%sLegend:\t%s\n' % (64 * ' ', '\t'.join(
					'\n<font color=%s>%s</font>' % (
						COLORS.get(n, 'black'), query)
					for n, query in enumerate(queries)))
		else:
			legend = ''
			normquery = normquery or form.get('normquery')
			if normquery:
				norm = 'query'
				normresults = CORPORA[engine].counts(
						normquery, selected, start, end)
			else:
				norm = form.get('norm', 'sents')
			try:
				results = CORPORA[engine].counts(
						query, selected, start, end, indices=False)
			except Exception as err:
				yield '<span class=r>%s</span>' % htmlescape(
						str(err).splitlines()[-1])
				return
			if len(results) <= 32 and all(
					results[filename] < INDICESMAXRESULTS
					for filename in results):
				resultsindices = CORPORA[engine].counts(
						query, selected, start, end, indices=True)
		if not doexport:
			yield ('<a name=q%d><h3>%s</h3></a>\n<tt>%s</tt> '
					'[<a href="javascript: toggle(\'n%d\'); ">'
					'toggle results per text</a>]\n'
					'<div id=n%d style="display: none;"><pre>\n' % (
						n, name, htmlescape(query) if query is not None
						else legend, n, n))
		COLWIDTH = min(40, max(map(len, TEXTS)) + 2)
		for filename, cnt in sorted(results.items()):
			if query is None:
				cnt = combined[filename]
			else:
				combined[filename] += cnt
			textno = selected[filename]
			text = TEXTS[textno]
			cnts[text] = cnt
			if norm == 'consts':
				total = CORPUSINFO[engine][textno].numnodes
			elif norm == 'words':
				total = CORPUSINFO[engine][textno].numwords
			elif norm == 'sents':
				total = CORPUSINFO[engine][textno].len
			elif norm == 'query':
				total = normresults[filename] or 1
			else:
				raise ValueError
			relfreq[text] = 100.0 * cnt / total
			sumtotal += total
			if not doexport:
				out = ('%s (<a href="browsesents?%s">browse</a>)    '
						'%5d %5.2f %%' % (
						text.ljust(COLWIDTH)[:COLWIDTH],
						url_encode(
							dict(text=textno, sent=1,
								query=query or form['query'],
								engine=engine),
							separator=b';'),
						cnt, relfreq[text]))
				barcode = ''
				if resultsindices is not None:
					barcode = dispplot(resultsindices[filename],
							start or 1, end or CORPUSINFO[engine][textno].len)
				if cnt:
					yield out + barcode + '\n'
				else:
					yield '<span style="color: gray; ">%s%s</span>\n' % (
							out, barcode)
		if not doexport or query is not None:
			df[name] = pandas.Series(relfreq)
		if not doexport:
			yield ('%s             %5d %5.2f %%\n\n' % (
					'TOTAL'.ljust(COLWIDTH),
					sum(cnts.values()),
					(100.0 * sum(cnts.values()) / sumtotal)
					if sumtotal else float('nan')))
			yield '</pre></div>'
			if max(cnts.values()) == 0:
				continue
			elif form.get('slice'):
				# show absolute counts when all texts have been limited to same
				# number of sentences
				yield plot(cnts, max(cnts.values()),
						'Absolute counts of \'%s\'' % name, unit='matches',
						target=target, target2=target2)
			else:
				yield plot(relfreq, max(relfreq.values()),
						'Relative frequency of \'%s\'; norm=%s' % (name, norm),
						unit='%', target=target, target2=target2)
	if doexport:
		if form.get('export') == 'json':
			yield json.dumps(df.to_dict(), indent=2)
		else:
			yield df.to_csv(None)
	else:
		def fmt(x):
			"""Compact float repr."""
			return '%g' % round(x, 3)

		yield '<h3><a name=q%d>Overview of patterns</a></h3>\n' % (
				len(queries) + 2)
		# collate stats
		if form.get('target'):
			keys = METADATA[form['target']]
		else:
			keys = pandas.Series([key.split('_')[0] if '_' in key else key[0]
					for key in df.index], index=df.index)
		keyset = keys.unique()
		if len(keyset) * len(queries) <= 30:
			overview = OrderedDict(
					('%s_%s' % (cat, query),
						df[query].loc[keys == cat].mean() or 0)
					for query in df.columns
						for cat in keyset)
			df['category'] = keys
			yield '<pre>\n%s\n</pre>' % (
					df.groupby('category').describe().to_string(
						float_format=fmt))
		else:
			overview = OrderedDict((query, df[query].mean())
					for query in df.columns)
			yield '<pre>\n%s\n</pre>' % df.describe().to_string(
					float_format=fmt)
		yield plot(overview, max(overview.values()),
				'Relative frequencies of patterns'
				'(count / num_%s * 100)' % norm, unit='%',
				dosort=False, target=target, target2=target2)