def setFilterPackageInfo(self, arg): fpspec = filters_factory.FilterPackageSpec(arg) if fpspec.is_url: self.ui.lblInfo.setText("Location: <b>{}</b>".format(htmlescape(str(fpspec.url)))) else: self.ui.lblInfo.setText("<b>{}:</b> {}".format(htmlescape(str(fpspec.fpname)), htmlescape(str(fpspec.fpdir))))
def sentence_report(self): """Returns ------- unicode HTML containing misclassified sentences. """ y_true, y_pred, sentences = self._data['y_true'], self._data[ 'y_pred'], self._data['sentences'] labels = self._data['labels'] D = defaultdict(list) # divide sentences / for i, (t, p) in enumerate(zip(y_true, y_pred)): if t != p: D[(t, p)].append(sentences[i]) html = '<h3>Misclassified sentences</h3>' for s, s_label in enumerate(labels): for d, d_label in enumerate(labels): if s == d: continue sents = D[(s, d)] if len(sents) > 0: html += '<table><tr><th>{0} → {1}. Count: {2}.</th></tr>'.format( htmlescape(s_label), htmlescape(d_label), len(sents)) for sent in sents: html += '<tr><td>{0}</td></tr>'.format( htmlescape(sent)) html += '</table>' return html
def on_get(self, req, resp): """This is what actually serves the resource and displays the website and list.""" # We get the server id parameter as a string, if there isn't a server_id parameter, or it doesn't exist in the server_user_list, we return a 404 requested_server_id = req.params.get("serverid") # If the parameter didn't exist, it's None if requested_server_id is None: # We give back a 404 resp.body = "That server hasn't enabled this feature. (No server ID)" resp.status = falcon.status_codes.HTTP_NOT_FOUND # We log self.log_info("Got invalid server id in get request.") return # We check that the serverid parameter wasn't given multiple times, and then passed to us as a list if isinstance(requested_server_id, list): # We give back a 404 resp.body = "That server hasn't enabled this feature. (Invalid server ID)" resp.status = falcon.status_codes.HTTP_NOT_FOUND # We log self.log_info("Got invalid server id in get request.") return # We check that the specified server id is a key in the server_user_list if not requested_server_id in self.server_user_list: # We give back a 404 resp.body = "That server hasn't enabled this feature. (No data)" resp.status = falcon.status_codes.HTTP_NOT_FOUND # We log self.log_info("Got invalid server id in get request.") return # We create the html response # We begin by creating a list of user entries, sorted by escaped lowercase username alphabetically user_sorted_list = sorted(self.server_user_list[requested_server_id], key=lambda user: htmlescape(user["username"].lower(), quote=True)) # We create a list of the list_entry html file, and format each one according to the user_sorted_list list_entries_html = [ self.dynamic_html.format(x["icon_url"], htmlescape(x["username"], quote=True), x["last_seen_time"]) for _, x in enumerate(user_sorted_list)] # We set the content_type header resp.content_type = "text/html" # We format the static html with the list entries resp.body = self.static_html.format("".join(list_entries_html)) # We return successfully, and we log it resp.status = falcon.HTTP_OK self.log_info("Served userlist page for server id {0}.".format(requested_server_id))
def updateSrcDisplay(self): if self.data.src: html = "<html><head/><body>" if len(self.data.src) == 1: html += "<p>" + htmlescape(self.data.src[0]) + "</p>" else: html += "<ol>" + "".join([ "<li>"+htmlescape(x)+"</li>" for x in self.data.src ]) + "</ol>" html += "</body></html>" self.ui.txtSources.setHtml(html) else: self.ui.txtSources.setHtml( "<html><head/><body><p style=\"color: #808080; font-style:italic\">(no sources set)</p></body></html>" )
def _display_header(self): rawheader = self.bibolamaziFile.rawHeader().strip() if rawheader: fileinfohtml = ("<h2>Raw Header</h2>\n" "<p class=\"shadow\">The top section of the bibolamazi file " "is ignored by bibolamazi. Whatever bibtex entries listed here " "will not be affected by bibolamazi filters and will be retained " "as is at the top of the file. These bibtex entries are seen by " "latex as regular entries that have not been filtered by bibolamazi. " "This portion of the file cannot be edited here; use your favorite " "text editor to edit.</p>" "<pre class=\"small\">" + htmlescape(rawheader) + "</pre>") dark_mode = uiutils.is_dark_mode(self) thehtml = textwrap.dedent('''\ <!DOCTYPE HTML> <html> <head> <style type="text/css"> %(basecss)s .source { margin: 0.5em 0px 0px 0px; } .filter { margin: 0.3em 0px 0px 0px; } .filterdescription { font-style: italic; margin-left: 2.5em; } </style> </head> <body> %(content)s </body> </html>''') % { 'basecss': helpbrowser.getCssHelpStyle(dark_mode=dark_mode), 'content': helpbrowser.wrapInHtmlContentContainer(fileinfohtml, width=800) } self.ui.txtInfo.setHtml(thehtml)
def _html_row(tag, unsafe, css_cls, css_style, cell_values, colwidths, colaligns): try: from html import escape as htmlescape except ImportError: from cgi import escape as htmlescape if not css_cls: css_cls = {} if not css_style: css_style = {} cols = [] typ = 'header_cols' if tag == HTML.TH else 'cols' for i, v in enumerate(cell_values): v = v if unsafe else htmlescape(v) cols.append( html_tag(tag, v.strip(), css_cls.get(typ, [None] * (i + 1))[i], css_style.get(typ, [None] * (i + 1))[i])) row = html_tag(HTML.TR, ''.join(cols).strip(), css_cls.get('row'), css_style.get('row')) if tag == HTML.TH: return f"<{HTML.TABLE}>\n<{HTML.THEAD}>\n{row}\n</{HTML.THEAD}>\n<{HTML.TBODY}>" else: return row
def _update_gui_state(self): if self.ui.stk.currentWidget() == self.ui.pageUsername: self.ui.stk.setCurrentWidget(self.ui.pageUsername) self.ui.btnBack.setVisible(False) self.ui.btnNext.setVisible(True) self.ui.btnNext.setEnabled(True if self.ui.txtUser.text() else False) self.ui.btnOk.setVisible(False) elif self.ui.stk.currentWidget() == self.ui.pageRepo: username = self.ui.txtUser.text() self.ui.lblPromptRepo.setTextFormat(Qt.RichText) self.ui.lblPromptRepo.setText("Repositories for <b>{}</b>".format(htmlescape(username))) try: repolist = self._get_repolist_for_user(username) if self.ui.cbxRepos.repolist != repolist: with BlockedSignals(self.ui.cbxRepos): self.ui.cbxRepos.clear() for repo in repolist: self.ui.cbxRepos.addItem(repo) self.ui.cbxRepos.repolist = repolist except Exception as e: logger.debug("Ignoring exception ... %r", e) logger.exception("Ignoring exception") pass self.ui.btnBack.setVisible(True) self.ui.btnNext.setVisible(False) self.ui.btnOk.setVisible(True) self.ui.btnOk.setEnabled(True if self.ui.cbxRepos.currentText() else False)
def sentence_report(self): """Returns ------- unicode HTML containing misclassified sentences. """ y_true, y_pred, sentences = self._data['y_true'], self._data['y_pred'], self._data['sentences'] labels = self._data['labels'] D = defaultdict(list) # divide sentences / for i, (t, p) in enumerate(zip(y_true, y_pred)): if t != p: D[(t, p)].append(sentences[i]) html = '<h3>Misclassified sentences</h3>' for s, s_label in enumerate(labels): for d, d_label in enumerate(labels): if s == d: continue sents = D[(s, d)] if len(sents) > 0: html += '<table><tr><th>{0} → {1}. Count: {2}.</th></tr>'.format(htmlescape(s_label), htmlescape(d_label), len(sents)) for sent in sents: html += '<tr><td>{0}</td></tr>'.format(htmlescape(sent)) html += '</table>' return html
def richtext_to_plaintext(text, default="", escape=False) -> str: is_draftjs, text = try_parse_draftjs(text or default) if not is_draftjs: text = html_to_plaintext(text) if escape: text = htmlescape(text) return text
def _get_help_page_general(pathitems, kwargs): if pathitems == ['welcome']: canonpath = '/general/welcome' _get_help_canonpath_check(canonpath, kwargs) return HelpTopicPage.makeMarkdownPage( HELP_WELCOME, title="Welcome", canonpath=canonpath ) if pathitems == ['cmdline']: canonpath = '/general/cmdline' _get_help_canonpath_check(canonpath, kwargs) p = kwargs.pop('parser', None) if p is None: from . import main as bibolamazimain p = bibolamazimain.get_args_parser() return HelpTopicPage.makeTxtPage( "\n".join(helptext_prolog_lines()) + "\n\n" + p.format_help(), title="Command-Line Help", canonpath=canonpath ) if pathitems == ['version']: canonpath = '/general/version' _get_help_canonpath_check(canonpath, kwargs) return HelpTopicPage.makeMarkdownPage( htmlescape("\n\n".join(helptext_prolog_lines())), title="Version", canonpath=canonpath ) if pathitems == ['cmdlversion']: canonpath = '/general/cmdlversion' _get_help_canonpath_check(canonpath, kwargs) return HelpTopicPage.makeTxtPage( TMPL_VERSION_INFO.format( version=butils.get_version(), copyrightyear=butils.get_copyrightyear() ), title="Version", canonpath=canonpath ) raise HelpPageError("Unknown help path: /{}".format('/'.join(kwargs['basepathitems']+pathitems)))
def _prepare_yaml_element(element): """Prepare a yaml element for display in html""" element["time"] = element["time"][11:] for key, val in element.items(): if type(element[key]) == str: element[key] = htmlescape(val) if "message" in element: element["message"] = formatting.to_html(element["message"]) element["message"] = url_pat.sub(r"<a href='\1'>\1</a>", element["message"])
def classification_report_list(self, title, data): html = ('<table>' '<tr><th colspan="5">{0}</th></tr>' '<tr><th>Class</th><th>Precision</th><th>Recall</th><th>F1</th><th>Support/Count</th>' '</tr>').format(title) for l, p, r, f, s in data: row = '<tr><td>{0}</td><td>{1:.1f}</td><td>{2:.1f}</td><td>{3:.1f}</td><td>{4}</td></tr>' row = row.format(htmlescape(l), p*100, r*100, f*100, s) html += row return html + '</table>'
def _prepare_yaml_element(element): """Prepare a yaml element for display in html""" element["time"] = element["time"][11:] for key, val in element.items(): if isinstance(element[key], str): element[key] = htmlescape(val) if "message" in element: element["message"] = formatting.to_html(element["message"]) element["message"] = url_pat.sub(r"<a href='\1'>\1</a>", element["message"])
def _search_logs(self, request): querystr = bytes_to_str(request.args[b"q"][0]) if b"page" in request.args: try: page = int(request.args[b"page"][0]) except ValueError: page = -1 else: page = 1 if page < 1: log_data = "Invalid page number specified" request.write( str_to_bytes( search_page_template.format(log_data=log_data, title=self.title, header=header, footer=footer, channel=self.channel))) request.finish() return with self.ix.searcher() as searcher: query = QueryParser("content", self.ix.schema).parse(querystr) res_page = searcher.search_page(query, page, pagelen=self.pagelen, sortedby="date", reverse=True) res_page.results.fragmenter = highlight.SentenceFragmenter( sentencechars=u".!?\u2026", charlimit=None) log_data = "" for hit in res_page: log_data += ("<ul><div><label><a href='{channel}?date=" "{date}'>{date}</a></label>".format( channel=self.channel_link(), date=hit["date"].strftime("%Y-%m-%d")) + hit.highlights("content") + "</div></ul>") else: if not res_page.is_last_page(): log_data += "<a href='?q={}&page={}'>Next</a>".format( querystr, page + 1) if not res_page: log_data = "No Logs found containg: {}".format( htmlescape(querystr)) request.write( str_to_bytes( search_page_template.format(log_data=log_data, title=self.title, header=header, footer=footer, channel=self.channel_link()))) request.finish()
def insert_spans(text, spans, css_classes): """Insert spans with specified css classes into text and html escape all other characters.""" positions = [] for (start, end), classes in zip(spans, css_classes): start_token = (start, 1, classes) end_token = (end, 0, None) positions.append(start_token) positions.append(end_token) positions.sort() text = [htmlescape(c) for c in text] for pos, t, classes in reversed(positions): if t == 1: text[pos:pos] = '<span style="{0}">'.format(classes) else: text[pos:pos] = '</span>' return ''.join(text)
def _search_logs(self, request): querystr = unicode(request.args[b"q"][0], "utf-8") if b"page" in request.args: try: page = int(request.args[b"page"][0]) except ValueError: page = -1 else: page = 1 if page < 1: log_data = "Invalid page number specified" request.write(str_to_bytes(search_page_template.format( log_data=log_data, title=self.title, header=header, footer=footer, channel=self.channel))) request.finish() return with self.ix.searcher() as searcher: query = QueryParser("content", self.ix.schema).parse(querystr) res_page = searcher.search_page(query, page, pagelen=self.pagelen, sortedby="date", reverse=True) res_page.results.fragmenter = highlight.SentenceFragmenter( sentencechars=u".!?\u2026", charlimit=None) log_data = "" for hit in res_page: log_data += ("<ul><div><label><a href='{channel}?date=" "{date}'>{date}</a></label>".format( channel=self.channel_link(), date=hit["date"].strftime("%Y-%m-%d")) + hit.highlights("content") + "</div></ul>") else: if not res_page.is_last_page(): log_data += "<a href='?q={}&page={}'>Next</a>".format( querystr, page + 1) if not res_page: log_data = "No Logs found containg: {}".format( htmlescape(querystr)) if sys.version_info.major < 3: log_data = log_data.encode("utf-8") request.write(str_to_bytes(search_page_template.format( log_data=log_data, title=self.title, header=header, footer=footer, channel=self.channel_link()))) request.finish()
def bibolamazi_error_html(errortxt, wrap_pre=True): def a_link(m): return "<a href=\"action:/goto-bibolamazi-file-line/%d\">%s</a>" %( int(m.group('lineno')), m.group() ) errortxt = str(htmlescape(errortxt, quote=True)) errortxt = re.sub(r'@:.*line\s+(?P<lineno>\d+)', a_link, errortxt) try: # if wrap_pre = (start_tag, end_tag) return wrap_pre[0] + errortxt + wrap_pre[1] except (TypeError,IndexError): pass if wrap_pre: # if wrap_pre = True return ("<pre style=\"white-space: pre-wrap\">"+errortxt+"</pre>") return errortxt
def misclassified_data(self): y_true, y_pred = self._data['y_true'], self._data['y_pred'] sentences = self._data['sentences'] df = self._data['dataframe'] labels = self._data['labels'] sigfeatures = self._data['sigfeatures'] settings = self._data['settings'] feature_names = self._data['feature_names'] ta = TextAnnotator(settings.unifier) D = defaultdict(list) # divide sentences / for i, (t, p) in enumerate(zip(y_true, y_pred)): if t != p: D[(t, p)].append(i) assert labels[t] == df[settings.label][i] html = """<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>Classification report</title> </head> <body>""" html += '<h3>Misclassified data</h3>' for s, s_label in enumerate(labels): for d, d_label in enumerate(labels): if s == d: continue idxs = D[(s, d)] if len(idxs) > 0: html += '<br/><br/><br/><b>True label:</b> {0}<br/><b>Predicted label:</b> {1}<br/><b>Count:</b> {2}<br/>\n'.format( s_label, d_label, len(idxs)) subdf = df.iloc[idxs].fillna('') for idx in idxs: features = [(feature_names[featidx], value) for featidx, value in sigfeatures[idx]] for col in settings.features: subdf[col][idx] = ta.annotate_important_features( htmlescape(subdf[col][idx]), features) html += subdf.to_html(index=False, escape=False) return html + '</body></html>'
def generate_profile(self, escape=False): profile = {} profile['PayloadUUID'] = self.profile_uuid profile['PayloadType'] = "Configuration" profile['PayloadOrganization'] = self.profile_organization profile['PayloadIdentifier'] = self.profile_uuid profile['PayloadDisplayName'] = self.profile_name profile['PayloadDescription'] = self.profile_description profile['PayloadVersion'] = 1 profile['PayloadEnabled'] = True profile['PayloadRemovalDisallowed'] = True profile['PayloadContent'] = self.payloads formatted_profile = plistlib.writePlistToString(profile) if escape: return htmlescape(formatted_profile) else: return formatted_profile
def misclassified_data(self): y_true, y_pred = self._data['y_true'], self._data['y_pred'] sentences = self._data['sentences'] df = self._data['dataframe'] labels = self._data['labels'] sigfeatures = self._data['sigfeatures'] settings = self._data['settings'] feature_names = self._data['feature_names'] ta = TextAnnotator(settings.unifier) D = defaultdict(list) # divide sentences / for i, (t, p) in enumerate(zip(y_true, y_pred)): if t != p: D[(t, p)].append(i) assert labels[t] == df[settings.label][i] html = """<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>Classification report</title> </head> <body>""" html += '<h3>Misclassified data</h3>' for s, s_label in enumerate(labels): for d, d_label in enumerate(labels): if s == d: continue idxs = D[(s, d)] if len(idxs) > 0: html += '<br/><br/><br/><b>True label:</b> {0}<br/><b>Predicted label:</b> {1}<br/><b>Count:</b> {2}<br/>\n'.format(s_label, d_label, len(idxs)) subdf = df.iloc[idxs].fillna('') for idx in idxs: features = [(feature_names[featidx], value) for featidx, value in sigfeatures[idx]] for col in settings.features: subdf[col][idx] = ta.annotate_important_features(htmlescape(subdf[col][idx]), features) html += subdf.to_html(index=False, escape=False) return html + '</body></html>'
def contentAsHtmlFragment(self): if 'htmlfragment' in self._content: return self.getContent('htmlfragment') if 'markdown' in self._content: # format documentation using markdown2. Import this now only, so # that as long as we don't need markdown->html then this module # doesn't have to be installed import markdown2 return ( markdown2.markdown( self.getContent('markdown'), extras=["footnotes", "fenced-code-blocks", "smarty-pants", "tables"]) ) if 'txt' in self._content: return ("<pre class=\"txtcontent\">" + htmlescape(self.getContent('txt')) + "</pre>") raise HelpPageError("Can't convert content to HTML, we have {}" .format(", ".join(self._content.keys())))
def dolog(self, txt, levelno=logging.INFO): try: html = txt.getHtml() # in case of a PreformattedHtml instance except AttributeError: html = str(htmlescape(txt)) # in case of a simple plain text string sty = '' if levelno == logging.ERROR or levelno == logging.CRITICAL: sty = "color: #ff0000; font-weight: bold;" elif levelno == logging.WARNING: sty = "color: rgb(150,80,0); font-weight: bold;" elif levelno == logging.INFO: sty = "font-weight: normal;" # default color #"color: #000000; font-weight: normal;" elif levelno == logging.DEBUG or levelno == blogger.LONGDEBUG: sty = "color: #7f7f7f; font-weight: normal;" else: # unknown level sty = "color: #7f7f7f; font-weight: normal;" sty += "white-space: pre;" self.logHtml.emit("<span style=\"%s\">%s\n</span>"%(sty, html))
def significant_features(self): settings = self._data['settings'] feature_names = self._data['feature_names'] coef = self._data['coef'] ta = TextAnnotator(settings.unifier) labels = self._data['labels'] html = '<h3>Significant features by labels</h3>\n' html += '<p>Below is a list with at most 100 most significant features for each label, that are used in the classification process.</p>' html += '<p>Features written in <b>black</b> and <span style="color:red">red</span> denote features that are respectively contributing' html += ' towards and against assigning the particular class label. ' html += 'Both are equally important, but they should be interpreted differently, when debugging the classifier.</p>' html += '<table style="border: 1px solid black">' for idx, label in enumerate(labels): features = get_sig_features(idx, coef, 100) features = [(feature_names[featidx], value) for featidx, value in features] features = ta.trim_feature_prefixes(features) features = ', '.join( [ta.annotate_color(f, v) for f, v in features]) html += '<tr><td style="border-bottom: 1px solid black">{0}</td><td style="border-bottom: 1px solid black">{1}</td></tr>'.format( htmlescape(label), features) html += '</table>' return html
def get_opening_mark(classes): return OPENING_MARK.format(classes=htmlescape(classes))
def fragmentsinresults(form, doexport=False): """Extract recurring fragments from search results.""" engine = form.get('engine', 'tgrep2') if engine not in ('tgrep2', 'frag'): yield "Only applicable to treebanks." return gotresults = False filenames = {EXTRE.sub('', os.path.basename(a)): a for a in CORPORA[engine].files} selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)} start, end = getslice(form.get('slice')) uniquetrees = set() if not doexport: url = 'fragments?' + url_encode(dict(export='csv', **form), separator=b';') yield ('<pre>Query: %s\n' 'Fragments (showing up to %d fragments ' 'in the first %d search results from selected texts;\n' 'ordered by (freq ** 0.5 * numwords ** 2) ' '<a href="%s">Export</a>):\n' % (form['query'] if len(form['query']) < 128 else form['query'][:128] + '...', FRAGLIMIT, SENTLIMIT, url)) disc = engine != 'tgrep2' if disc: fragments.PARAMS.update(disc=True, fmt='discbracket') else: fragments.PARAMS.update(disc=False, fmt='bracket') for n, (_, _, treestr, _) in enumerate(CORPORA[engine].sents( form['query'], selected, start, end, maxresults=SENTLIMIT, brackets=True)): if n == 0: gotresults = True if engine == 'tgrep2': line = treestr.replace(" )", " -NONE-)") + '\n' elif engine == 'frag': line = treestr + '\n' else: raise ValueError uniquetrees.add(line.encode('utf8')) if not gotresults and not doexport: yield "No matches." return # TODO: get counts from whole text (preload) import tempfile with tempfile.NamedTemporaryFile(delete=True) as tmp: tmp.writelines(uniquetrees) tmp.flush() results, approxcounts = fragments.regular([tmp.name], 1, None, 'utf8') if disc: results = nlargest(FRAGLIMIT, zip(results, approxcounts), key=lambda ff: sum(1 for a in ff[0][1] if a) ** 2 * ff[1] ** 0.5) else: results = nlargest(FRAGLIMIT, zip(results, approxcounts), key=lambda ff: sum(1 for _ in re.finditer(r'[^ ()]\)', ff[0])) ** 2 * ff[1] ** 0.5) gotresults = False if not doexport: yield "<ol>" for tree, freq in results: gotresults = True if disc: tree, sent = tree sent = ' '.join(a or '' for a in sent) if doexport: if disc: yield '%s\t%s\t%s\n' % (tree, sent, freq) else: yield '%s\t%s\n' % (tree, freq) else: if disc: link = '<a href="draw?tree=%s;sent=%s">draw</a>' % ( quote(tree.encode('utf8')), quote(sent.encode('utf8'))) sent = GETLEAVES.sub(' <font color=red>\\1</font>', htmlescape(' ' + sent + ' ')) tree = htmlescape(tree) + ' ' + sent else: link = '<a href="draw?tree=%s">draw</a>' % ( quote(tree.encode('utf8'))) tree = GETLEAVES.sub(' <font color=red>\\1</font>', htmlescape(tree)) tree = GETFRONTIERNTS.sub('(<font color=blue>\\1</font> )', tree) yield "<li>freq=%3d [%s] %s" % (freq, link, tree) if not doexport: yield "</ol>" if gotresults: yield '</pre>' else: yield "No fragments with freq > %d & nodes > %d." % ( MINNODES, MINFREQ)
def sents(form, dobrackets=False): """Return search results as terminals or in bracket notation.""" gotresults = False engine = form.get('engine', 'tgrep2') filenames = {EXTRE.sub('', os.path.basename(a)): a for a in CORPORA[engine].files} selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)} start, end = getslice(form.get('slice')) url = '%s?%s' % ('trees' if dobrackets else 'sents', url_encode(dict(export='csv', **form), separator=b';')) yield ('<pre>Query: %s\n' 'Sentences (showing up to %d per text; ' 'export: <a href="%s">plain</a>, ' '<a href="%s">with line numbers</a>):\n' % ( form['query'] if len(form['query']) < 128 else form['query'][:128] + '...', SENTLIMIT, url, url + ';linenos=1')) try: tmp = CORPORA[engine].sents(form['query'], selected, start, end, maxresults=SENTLIMIT, brackets=dobrackets) except Exception as err: yield '<span class=r>%s</span>' % htmlescape(str(err).splitlines()[-1]) return # NB: avoid sorting; rely on the fact that matches for each filename are # already contiguous. filenames will be in arbitrary order due to # multiprocessing for n, (filename, results) in enumerate(groupby(tmp, itemgetter(0))): textno = selected[filename] text = TEXTS[textno] if 'breakdown' in form: if dobrackets: breakdown = Counter(high for _, _, _, high, _ in results) else: breakdown = Counter(re.sub( ' {2,}', ' ... ', ''.join(char if n in high1 or n in high2 else ' ' for n, char in enumerate(sent))) for _, _, sent, high1, high2 in results) yield '\n%s\n' % text for match, cnt in breakdown.most_common(): gotresults = True yield '%5d %s\n' % (cnt, match) continue for m, (_filename, sentno, sent, high1, high2) in enumerate(results): if m == 0: gotresults = True yield ("\n%s: [<a href=\"javascript: toggle('n%d'); \">" "toggle</a>] <ol id=n%d>" % (text, n, n)) link = ('<a href="browse?text=%d;sent=%d%s%s">tree</a>' '|<a href="browsesents?%s">context</a>' % ( textno, sentno, ';nofunc' if 'nofunc' in form else '', ';nomorph' if 'nomorph' in form else '', url_encode(dict(text=textno, sent=sentno, highlight=sentno, query=form['query'], engine=engine), separator=b';'))) if dobrackets: sent = htmlescape(sent.replace(" )", " -NONE-)")) out = sent.replace(high1, "<span class=r>%s</span>" % high1) else: out = applyhighlight(sent, high1, high2) yield "<li>#%s [%s] %s\n" % (str(sentno).rjust(6), link, out) yield "</ol>" yield '</pre>' if gotresults else 'No matches.'
def formatexception(e): if e is None: return "" return htmlescape("".join(traceback.format_exception( e, e, e.__traceback__)))
def plot(data, total, title, width=800.0, unit='', dosort=True, target=None, target2=None): """A HTML bar plot given a dictionary and max value.""" if len(data) > 30 and target is not None: df = pandas.DataFrame(index=data) if len(title) > 50: title = title[:50] + '...' df[title] = pandas.Series(data, index=df.index) df[target.name] = target.loc[df.index] if target2 is not None: df[target2.name] = target2.loc[df.index] if iscategorical(target): df.sort_values(by=target.name, inplace=True) if target2 is None: # seaborn.barplot(target.name, title, data=df) seaborn.violinplot(x=target.name, y=title, data=df, split=True, inner="stick", palette='Set1') else: seaborn.barplot(target.name, title, data=df, hue=target2.name, palette='Set1') fig = plt.gcf() fig.autofmt_xdate() else: # treat X-axis as continuous if target2 is None: seaborn.jointplot(target.name, title, data=df, kind='reg') else: seaborn.lmplot(target.name, title, data=df, hue=target2.name, palette='Set1') # Convert to D3, SVG, javascript etc. # import mpld3 # result = mpld3.fig_to_html(plt.gcf(), template_type='general', # use_http=True) # Convert to PNG figfile = io.BytesIO() plt.tight_layout() plt.savefig(figfile, format='png') import base64 result = '<div><img src="data:image/png;base64, %s"/></div>' % ( base64.b64encode(figfile.getvalue()).decode('utf8')) plt.close() return result result = ['<div class=barplot>', ('<text style="font-family: sans-serif; font-size: 16px; ">' '%s</text>' % title)] if target is not None: data = OrderedDict([(key, data[key]) for key in target.sort_values().index if key in data]) keys = {key.split('_')[0] if '_' in key else key[0] for key in data} color = {} if len(keys) <= 5: color.update(zip(keys, range(1, 6))) keys = list(data) if dosort: keys.sort(key=data.get, reverse=True) for key in keys: result.append('<br><div style="width:%dpx;" class=b%d></div>' '<span>%s: %g %s</span>' % ( int(round(width * data[key] / total)) if data[key] else 0, color.get(key.split('_')[0] if '_' in key else key[0], 1) if data[key] else 0, htmlescape(key), data[key], unit,)) result.append('</div>\n') return '\n'.join(result)
if format != 'applepages-export': print("Invalid format: ", format) sys.exit(1) HTML_TEMPLATE = """\ <img src=\"data:application/pdf;base64,{b64data}\" alt=\"{alttxt}\" title=\"{alttxt}\"> """ pdfcontents = None with open(pdffile) as f: pdfcontents = f.read() latexcode = os.environ.get('KLF_INPUT_LATEX', '') # some substitutions in the latex string to make it more readable [duplicates # C++ code from klfmime.cpp arrgh!!] # \! \, \; \: -> simple space latexcode = re.sub(r"\\[,;:!]", " ", latexcode) # \text{Hello}, \mathrm{Hilbert-Einstein} --> {the text} latexcode = re.sub(r"\\(?:text|mathrm)\{((?:\w|\s|[._-])*)\}", r"{\1}", latexcode) # \var(epsilon|phi|...) -> \epsilon,\phi,... latexcode = re.sub(r"\\var([a-zA-Z]+)", r"\\\1", latexcode) print(HTML_TEMPLATE.format( b64data=quote_plus(base64.b64encode(pdfcontents)), alttxt=htmlescape(latexcode) ))
def gen_htmlfragment(filtname=filtname, filtinfo=filtinfo, kwargs=dict(kwargs)): html = "<h1>Filter: {}</h1>\n\n".format(filtname) fpn = filtinfo.filterpackagename html += "<p class=\"shadow\">In filter package <b>" + htmlescape(fpn) + "</b></p>\n\n" author = filtinfo.fclass.getHelpAuthor().strip() if author: html += "<p>" + htmlescape(author) + "</p>\n\n" desc = filtinfo.fclass.getHelpDescription().strip() if desc: html += "<p>" + htmlescape(desc) + "</p>\n\n" table_width_px_str = str(kwargs.get('html_table_width_px', 550)) html_opt = '' html_doc = '' fopt = filtinfo.defaultFilterOptions() if fopt: # we're in business -- filter options html_opt += "<h2><a name=\"a-filter-options\"></a>Filter Options:</h2>\n\n" html_opt += "<table width=\""+table_width_px_str+"\">" for arg in fopt.filterOptions(): sopt_arg_name = fopt.getSOptNameFromArg(arg.argname) html_opt += "<tr><th><a name=\"a-filter-option-{}\"></a>".format(urlquoteplus(arg.argname)) \ + htmlescape(sopt_arg_name) + "</th></tr>" html_opt += "<tr><td class=\"indent\" width=\""+table_width_px_str+"\">" html_opt += "<p class=\"inner\">" + htmlescape(arg.doc if arg.doc else '') + "</p>" if arg.argtypename: typ = butils.resolve_type(arg.argtypename, filtinfo.fmodule) if typ is bool: html_opt += ("<p class=\"inner shadow\">Expects a boolean argument type" + " (True/1/Yes/On or False/0/No/Off)</p>") elif typ is int: html_opt += ("<p class=\"inner shadow\">Expects an integer as argument</p>") elif hasattr(typ, '__doc__') and typ.__doc__: # e.g., is not None docstr = typ.__doc__.strip() if len(docstr): html_opt += ("<p class=\"inner shadow\">Expects argument type " + "<code>" + htmlescape(arg.argtypename) + "</code>: " + docstr + "</p>") html_opt += "</td></tr>\n" if fopt.filterAcceptsVarArgs(): html_opt += "<tr><th>(...)</th></tr>" html_opt += ("<tr><td class=\"indent\" width=\""+table_width_px_str+"\">This filter accepts " "additional positional arguments (see doc below)</td></tr>") if fopt.filterAcceptsVarKwargs(): html_opt += "<tr><th>(...=...)</th></tr>" html_opt += ("<tr><td class=\"indent\" width=\""+table_width_px_str+"\">This filter accepts " "additional named/keyword arguments (see doc below)</td></tr>") html_opt += "</table>" html_opt += """ <p>Pass options with the syntax <code>-s</code><span class="code-meta">OptionName</span><code>="</code><span class="code-meta">option value</span><code>"</code> or <code>-d</code><span class="code-meta">OptionName[</span><code>=True</code><span class="code-meta">|</span><code>False</code><span class="code-meta">]</span>. The form <code>-sXXX</code> is for passing strings (which must be quoted if comprising spaces or special characters), and the form <code>-dXXX</code> is for specifying boolean ON/OFF switches.</p> """ html_doc += "<h2><a name=\"a-filter-doc\"></a>Filter Documentation:</h2>\n\n" html_doc += ("<div style=\"white-space: pre-wrap\">" + htmlescape(filtinfo.fclass.getHelpText()) + "</div>\n\n") elif hasattr(filtinfo.fmodule, 'format_help'): html_doc += ("<div style=\"white-space: pre-wrap\">" + htmlescape(filtinfo.fmodule.format_help()) + "</div>\n\n") else: html_doc += "<p style=\"font-style\">"+htmlescape(filtinfo.fclass.getHelpText())+"</p>\n\n" #html += "<p style=\"font-style\">(no additional help available)</p>" if html_opt and html_doc: html += '<p><b>Contents:</b></p>' html += '<ul><li><a href="#a-filter-opt">Filter Options</a></li>' html += '<li><a href="#a-filter-doc">Filter Documentation</li></ul>\n' html += html_opt html += html_doc return html
def setFilterPackageError(self, errmsg): self.ui.lblInfo.setText("<span style=\"color: #800000\">{}</span>".format(htmlescape(errmsg)))
def trees(form): """Return visualization of parse trees in search results.""" gotresults = False engine = form.get('engine', 'tgrep2') filenames = {EXTRE.sub('', os.path.basename(a)): a for a in CORPORA[engine].files} selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)} start, end = getslice(form.get('slice')) # NB: we do not hide function or morphology tags when exporting url = 'trees?' + url_encode(dict(export='csv', **form), separator=b';') yield ('<pre>Query: %s\n' 'Trees (showing up to %d per text; ' 'export: <a href="%s">plain</a>, ' '<a href="%s">with line numbers</a>):\n' % ( form['query'] if len(form['query']) < 128 else form['query'][:128] + '...', TREELIMIT, url, url + ';linenos=1')) try: tmp = CORPORA[engine].trees(form['query'], selected, start, end, maxresults=TREELIMIT, nomorph='nomorph' in form, nofunc='nofunc' in form) except Exception as err: yield '<span class=r>%s</span>' % htmlescape(str(err).splitlines()[-1]) return for n, (filename, results) in enumerate(groupby(tmp, itemgetter(0))): textno = selected[filename] text = TEXTS[textno] if 'breakdown' in form: breakdown = Counter(DiscTree( max(high, key=lambda x: len(x.leaves()) if isinstance(x, Tree) else 1).freeze(), sent) for _, _, _, sent, high in results if high) yield '\n%s\n' % text for match, cnt in breakdown.most_common(): gotresults = True yield 'count: %5d\n%s\n\n' % ( cnt, DrawTree(match, match.sent).text( unicodelines=True, html=True, funcsep='-')) continue for m, (_filename, sentno, tree, sent, high) in enumerate(results): if m == 0: gotresults = True yield ("==> %s: [<a href=\"javascript: toggle('n%d'); \">" "toggle</a>]\n<span id=n%d>" % (text, n + 1, n + 1)) link = ('<a href="browse?text=%d;sent=%d%s%s">browse</a>' '|<a href="browsesents?%s">context</a>' % ( textno, sentno, ';nofunc' if 'nofunc' in form else '', ';nomorph' if 'nomorph' in form else '', url_encode(dict(text=textno, sent=sentno, query=form['query'], engine=engine), separator=b';'))) try: treerepr = DrawTree(tree, sent, highlight=high).text( unicodelines=True, html=True, funcsep='-') except ValueError as err: line = "#%s \nERROR: %s\n%s\n%s\n" % ( sentno, err, tree, sent) else: line = "#%s [%s]\n%s\n" % (sentno, link, treerepr) yield line yield "</span>" yield '</pre>' if gotresults else "No matches."
def annotate_color(self, f, v): color = 'black' if v < 0: return '<span style="color:red">{0}</span>'.format(htmlescape(f)) else: return '<b>{0}</b>'.format(htmlescape(f))
def significant_features(self): settings = self._data['settings'] feature_names = self._data['feature_names'] coef = self._data['coef'] ta = TextAnnotator(settings.unifier) labels = self._data['labels'] html = '<h3>Significant features by labels</h3>\n' html += '<p>Below is a list with at most 100 most significant features for each label, that are used in the classification process.</p>' html += '<p>Features written in <b>black</b> and <span style="color:red">red</span> denote features that are respectively contributing' html += ' towards and against assigning the particular class label. ' html += 'Both are equally important, but they should be interpreted differently, when debugging the classifier.</p>' html += '<table style="border: 1px solid black">' for idx, label in enumerate(labels): features = get_sig_features(idx, coef, 100) features = [(feature_names[featidx], value) for featidx, value in features] features = ta.trim_feature_prefixes(features) features = ', '.join([ta.annotate_color(f, v) for f, v in features]) html += '<tr><td style="border-bottom: 1px solid black">{0}</td><td style="border-bottom: 1px solid black">{1}</td></tr>'.format(htmlescape(label), features) html += '</table>' return html
def __init__(self, source, proposal): proposal = htmlescape(proposal) differ = Differ() self.diff = list(differ.compare(source.splitlines(1), proposal.splitlines(1)))
def counts(form, doexport=False): """Produce graphs and tables for a set of queries. Queries should be given one per line, optionally prefixed by a name and a normalization query:: [name: ][normquery<tab>]query returns one graph for each query, and an overview with totals (optionally per category, if the first letters of each corpus name form a small set); """ # TODO: option to arrange graphs by text instead of by query engine = form.get('engine', 'tgrep2') filenames = {EXTRE.sub('', os.path.basename(a)): a for a in CORPORA[engine].files} selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)} start, end = getslice(form.get('slice')) target = METADATA[form['target']] if form.get('target') else None target2 = METADATA[form['target2']] if form.get('target2') else None if not doexport: url = 'counts?' + url_encode(dict(export='csv', **form), separator=b';') yield ('Counts from queries ' '(<a href="%s">export to CSV</a>):\n' % url) # Combined results of all queries on each file combined = defaultdict(int) index = [TEXTS[n] for n in selected.values()] df = pandas.DataFrame(index=index) queries = querydict(form['query']) if not doexport: yield '<ol>%s</ol>\n' % '\n'.join( '<li><a href="#q%d">%s</a>' % (n, query) for n, query in enumerate(list(queries) + ['Combined results', 'Overview'], 1)) for n, (name, (normquery, query)) in enumerate( list(queries.items()) + [('Combined results', ('', None))], 1): cnts = Counter() sumtotal = 0 relfreq = {} resultsindices = None if query is None: if len(df.columns) == 1: break results = combined legend = '%sLegend:\t%s\n' % (64 * ' ', '\t'.join( '\n<font color=%s>%s</font>' % ( COLORS.get(n, 'black'), query) for n, query in enumerate(queries))) else: legend = '' normquery = normquery or form.get('normquery') if normquery: norm = 'query' normresults = CORPORA[engine].counts( normquery, selected, start, end) else: norm = form.get('norm', 'sents') try: results = CORPORA[engine].counts( query, selected, start, end, indices=False) except Exception as err: yield '<span class=r>%s</span>' % htmlescape( str(err).splitlines()[-1]) return if len(results) <= 32 and all( results[filename] < INDICESMAXRESULTS for filename in results): resultsindices = CORPORA[engine].counts( query, selected, start, end, indices=True) if not doexport: yield ('<a name=q%d><h3>%s</h3></a>\n<tt>%s</tt> ' '[<a href="javascript: toggle(\'n%d\'); ">' 'toggle results per text</a>]\n' '<div id=n%d style="display: none;"><pre>\n' % ( n, name, htmlescape(query) if query is not None else legend, n, n)) COLWIDTH = min(40, max(map(len, TEXTS)) + 2) for filename, cnt in sorted(results.items()): if query is None: cnt = combined[filename] else: combined[filename] += cnt textno = selected[filename] text = TEXTS[textno] cnts[text] = cnt if norm == 'consts': total = CORPUSINFO[engine][textno].numnodes elif norm == 'words': total = CORPUSINFO[engine][textno].numwords elif norm == 'sents': total = CORPUSINFO[engine][textno].len elif norm == 'query': total = normresults[filename] or 1 else: raise ValueError relfreq[text] = 100.0 * cnt / total sumtotal += total if not doexport: out = ('%s (<a href="browsesents?%s">browse</a>) ' '%5d %5.2f %%' % ( text.ljust(COLWIDTH)[:COLWIDTH], url_encode( dict(text=textno, sent=1, query=query or form['query'], engine=engine), separator=b';'), cnt, relfreq[text])) barcode = '' if resultsindices is not None: barcode = dispplot(resultsindices[filename], start or 1, end or CORPUSINFO[engine][textno].len) if cnt: yield out + barcode + '\n' else: yield '<span style="color: gray; ">%s%s</span>\n' % ( out, barcode) if not doexport or query is not None: df[name] = pandas.Series(relfreq) if not doexport: yield ('%s %5d %5.2f %%\n\n' % ( 'TOTAL'.ljust(COLWIDTH), sum(cnts.values()), (100.0 * sum(cnts.values()) / sumtotal) if sumtotal else float('nan'))) yield '</pre></div>' if max(cnts.values()) == 0: continue elif form.get('slice'): # show absolute counts when all texts have been limited to same # number of sentences yield plot(cnts, max(cnts.values()), 'Absolute counts of \'%s\'' % name, unit='matches', target=target, target2=target2) else: yield plot(relfreq, max(relfreq.values()), 'Relative frequency of \'%s\'; norm=%s' % (name, norm), unit='%', target=target, target2=target2) if doexport: if form.get('export') == 'json': yield json.dumps(df.to_dict(), indent=2) else: yield df.to_csv(None) else: def fmt(x): """Compact float repr.""" return '%g' % round(x, 3) yield '<h3><a name=q%d>Overview of patterns</a></h3>\n' % ( len(queries) + 2) # collate stats if form.get('target'): keys = METADATA[form['target']] else: keys = pandas.Series([key.split('_')[0] if '_' in key else key[0] for key in df.index], index=df.index) keyset = keys.unique() if len(keyset) * len(queries) <= 30: overview = OrderedDict( ('%s_%s' % (cat, query), df[query].loc[keys == cat].mean() or 0) for query in df.columns for cat in keyset) df['category'] = keys yield '<pre>\n%s\n</pre>' % ( df.groupby('category').describe().to_string( float_format=fmt)) else: overview = OrderedDict((query, df[query].mean()) for query in df.columns) yield '<pre>\n%s\n</pre>' % df.describe().to_string( float_format=fmt) yield plot(overview, max(overview.values()), 'Relative frequencies of patterns' '(count / num_%s * 100)' % norm, unit='%', dosort=False, target=target, target2=target2)