def render_GET(self, request): new_username = '******' + str(random.randint(10000, 99999)) self._log.new_box(new_username) request.setHeader('Content-Type', 'text/html; charset=utf-8') return """<!DOCTYPE html> <html> <head> <title>Gustafo</title> <link rel="stylesheet" type="text/css" href="/style.css"> </head> <body> <div id="column"> <h1>Gustafo</h1> <div id="chatRecord"></div> <div id="inputLine"> <form action="/" method="POST"> <input type="hidden" name="user" value="%s" size="10"> > <span id="userName">%s</span>: <input type="text" name="m" size="30" x-webkit-speech> <input type="submit" value="Send"> </form> </div> </div> <script type="text/javascript" src="/jquery-1.9.1.js"></script> <script type="text/javascript" src="/chat.js"></script> </body> </html> """ % (htmlescape(new_username), htmlescape(new_username))
def reply_qq(sender_id, sender, msg): global f_log msg = msg.strip() f_log.write("<p>" + htmlescape(sender) + " : " + htmlescape(msg) + "</p>") f_log.flush() if msg == "醒醒,醒醒": if sender != "": return sender + ",来啦来啦~" else: return "来啦来啦~" else: return ""
def _format_fragment(self, text, fragment, seen): tagname = self.tagname attrs = self.attrs htmlclass = " ".join((self.classname, self.termclass)) output = [] index = fragment.startchar for t in fragment.matches: if t.startchar > index: output.append(text[index:t.startchar]) ttxt = htmlescape(text[t.startchar:t.endchar]) if t.matched: if t.text in seen: termnum = seen[t.text] else: termnum = len(seen) seen[t.text] = termnum ttxt = '<%s %s class="%s%s">%s</%s>' % (tagname, attrs, htmlclass, termnum, ttxt, tagname) output.append(ttxt) index = t.endchar return "".join(output)
def _format_fragment(self, text, fragment, seen): htmlclass = " ".join((self.classname, self.termclass)) output = [] index = fragment.startchar for t in fragment.matches: if t.startchar > index: output.append(text[index:t.startchar]) ttxt = htmlescape(text[t.startchar:t.endchar]) if t.matched: if t.text in seen: termnum = seen[t.text] else: termnum = len(seen) % self.maxclasses seen[t.text] = termnum ttxt = self.template % { "tag": self.tagname, "q": self.attrquote, "cls": htmlclass, "t": ttxt, "tn": termnum } output.append(ttxt) index = t.endchar if index < fragment.endchar: output.append(text[index:fragment.endchar]) return "".join(output)
def _format_fragment(self, text, fragment, seen): tagname = self.tagname attrs = self.attrs htmlclass = " ".join((self.classname, self.termclass)) output = [] index = fragment.startchar for t in fragment.matches: if t.startchar > index: output.append(text[index:t.startchar]) ttxt = htmlescape(text[t.startchar:t.endchar]) if t.matched: if t.text in seen: termnum = seen[t.text] else: termnum = len(seen) seen[t.text] = termnum ttxt = '<%s %s class="%s%s">%s</%s>' % ( tagname, attrs, htmlclass, termnum, ttxt, tagname) output.append(ttxt) index = t.endchar return "".join(output)
def _format_fragment(self, text, fragment, seen): htmlclass = " ".join((self.classname, self.termclass)) output = [] index = fragment.startchar for t in fragment.matches: if t.startchar > index: output.append(text[index:t.startchar]) ttxt = htmlescape(text[t.startchar:t.endchar]) if t.matched: if t.text in seen: termnum = seen[t.text] else: termnum = len(seen) % self.maxclasses seen[t.text] = termnum ttxt = self.template % {"tag": self.tagname, "q": self.attrquote, "cls": htmlclass, "t": ttxt, "tn": termnum} output.append(ttxt) index = t.endchar if index < fragment.endchar: output.append(text[index:fragment.endchar]) return "".join(output)
def render(self, context): # protect against UA-less requests try: ua_string = context['request'].META['HTTP_USER_AGENT'] except KeyError: return '[未知]' ua_dict = user_agent_parser.Parse(ua_string) dev, d_os, ua = ( ua_dict['device'], ua_dict['os'], ua_dict['user_agent'], ) result = [] if dev.get('is_spider', False): result.append('[爬虫]') if dev.get('is_mobile', False): result.append('[便携]') if dev['family'] is not None: result.append('[%s]' % dev['family']) os_family = d_os['family'] os_maj, os_min = d_os['major'], d_os['minor'] if os_family != 'Other': if os_family == 'Android': result.append('[安卓') if os_maj is not None: result.append(unicode(os_maj)) if os_min is not None: result.append('.%s' % (os_min, )) result.append(']') else: result.append('[%s]' % os_family) ua_family = ua['family'] ua_version = [ua['major'], ua['minor'], ua['patch'], ] if ua_family != 'Other': result.append(' ') result.append(ua_family) result.append(' ') result.append('.'.join(unicode(i) for i in ua_version[:2])) if ua_version[2] is not None: result.append('.%s' % (ua_version[2], )) result_str = u''.join(result) # I just want to fetch some info, so... ##request_uri = context['request'].META['REQUEST_URI'] ##with open('D:\\UAStringNode-result.log', 'a+') as fp: ## fp.write('%s: %s\n' % ( ## request_uri, ## str(ua), ## )) return htmlescape(result_str)
def version_node_factory(s): ver = htmlescape(s if s else 'unknown') class _XueVersionNode(template.Node): def render(self, context): return ver return _XueVersionNode
def sents(form, dobrackets=False): """Return search results as terminals or in bracket notation.""" gotresults = False engine = form.get('engine', 'tgrep2') filenames = {EXTRE.sub('', os.path.basename(a)): a for a in CORPORA[engine].files} selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)} start, end = getslice(form.get('slice')) url = '%s?%s' % ('trees' if dobrackets else 'sents', url_encode(dict(export='csv', **form), separator=b';')) yield ('<pre>Query: %s\n' 'Sentences (showing up to %d per text; ' 'export: <a href="%s">plain</a>, ' '<a href="%s">with line numbers</a>):\n' % ( form['query'] if len(form['query']) < 128 else form['query'][:128] + '...', SENTLIMIT, url, url + ';linenos=1')) for n, (filename, results) in enumerate(groupby(sorted( CORPORA[engine].sents(form['query'], selected, start, end, maxresults=SENTLIMIT, brackets=dobrackets), key=itemgetter(0)), itemgetter(0))): textno = selected[filename] text = TEXTS[textno] if 'breakdown' in form: if dobrackets: breakdown = Counter(high for _, _, _, high, _ in results) else: breakdown = Counter(re.sub( ' {2,}', ' ... ', ' '.join(char if n in high1 or n in high2 else ' ' for n, char in enumerate(sent))) for _, _, sent, high1, high2 in results) yield '\n%s\n' % text for match, cnt in breakdown.most_common(): gotresults = True yield '%5d %s\n' % (cnt, match) continue for m, (filename, sentno, sent, high1, high2) in enumerate(results): if m == 0: gotresults = True yield ("\n%s: [<a href=\"javascript: toggle('n%d'); \">" "toggle</a>] <ol id=n%d>" % (text, n, n)) link = ('<a href="/browse?text=%d;sent=%s%s%s">tree</a>' '|<a href="/browsesents?text=%d;sent=%s;highlight=%s">' 'context</a>' % (textno, sentno, ';nofunc' if 'nofunc' in form else '', ';nomorph' if 'nomorph' in form else '', textno, sentno, sentno)) if dobrackets: sent = htmlescape(sent.replace(" )", " -NONE-)")) out = sent.replace(high1, "<span class=r>%s</span>" % high1) else: out = applyhighlight(sent, high1, high2) yield "<li>#%s [%s] %s\n" % (str(sentno).rjust(6), link, out) yield "</ol>" yield '</pre>' if gotresults else 'No matches.'
def plot(data, total, title, width=800.0, unit='', dosort=True, target=None, target2=None): """A HTML bar plot given a dictionary and max value.""" if len(data) > 30 and target is not None: df = pandas.DataFrame(index=data) df[title] = pandas.Series(data, index=df.index) df[target.name] = target.ix[df.index] if target2 is not None: df[target2.name] = target2.ix[df.index] if target.dtype == numpy.number: if target2 is None: seaborn.jointplot(target.name, title, data=df, kind='reg') else: seaborn.lmplot(target.name, title, data=df, hue=target2.name) else: # X-axis is categorical df.sort_values(by=target.name, inplace=True) if target2 is None: seaborn.barplot(target.name, title, data=df) else: seaborn.barplot(target.name, title, data=df, hue=target2.name) fig = plt.gcf() fig.autofmt_xdate() # Convert to D3, SVG, javascript etc. # import mpld3 # result = mpld3.fig_to_html(plt.gcf(), template_type='general', # use_http=True) # Convert to PNG figfile = io.BytesIO() plt.savefig(figfile, format='png') result = '<div><img src="data:image/png;base64, %s"/></div>' % ( base64.b64encode(figfile.getvalue()).decode('utf8')) plt.clf() return result result = ['<div class=barplot>', ('<text style="font-family: sans-serif; font-size: 16px; ">' '%s</text>' % title)] if target is not None: data = OrderedDict([(key, data[key]) for key in target.sort_values().index if key in data]) keys = {key.split('_')[0] if '_' in key else key[0] for key in data} color = {} if len(keys) <= 5: color.update(zip(keys, range(1, 6))) keys = list(data) if dosort: keys.sort(key=data.get, reverse=True) for key in keys: result.append('<br><div style="width:%dpx;" class=b%d></div>' '<span>%s: %g %s</span>' % ( int(round(width * data[key] / total)) if data[key] else 0, color.get(key.split('_')[0] if '_' in key else key[0], 1) if data[key] else 0, htmlescape(key), data[key], unit,)) result.append('</div>\n') return '\n'.join(result)
def x_matFormatAsList(value, tagname): lines = value.content.splitlines() start_tag, end_tag = tagname.join(('<', '>')), tagname.join(('</', '>')) tags = (start_tag, end_tag, ) # Must escape here as we're generating tags! result = (htmlescape(ln).join(tags) for ln in lines) return mark_safe('\n'.join(result))
def post_status_to_twitter(status): logging.info("Attempting to tweet: "+status) content = oauth2_request( 'https://api.twitter.com/1.1/statuses/update.json', app_token, app_token_secret, http_method="POST", post_body="status="+htmlescape(status), ) if content[0]['status'] != '200': logging.warning("Tweet unsuccessful. Error message: " + str(content)) return (False, content) logging.info("Tweet successful.") return (True, None)
def deansi(text) : text = htmlescape(text) blocks = text.split("\033") state = set(), None, None ansiBlocks = blocks[:1] for block in blocks[1:] : attributeCodes, plain = ansiAttributes(block) for code in attributeCodes : state = ansiState(code, *state) classes = stateToClasses(*state) ansiBlocks.append( (("<span class='%s'>"%classes) + plain + "</span>") if classes else plain ) text = "".join(ansiBlocks) return text
def _text(self, text): return htmlescape(text)
def trees(form): """Return visualization of parse trees in search results.""" gotresults = False engine = form.get('engine', 'tgrep2') filenames = {EXTRE.sub('', os.path.basename(a)): a for a in CORPORA[engine].files} selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)} start, end = getslice(form.get('slice')) # NB: we do not hide function or morphology tags when exporting url = 'trees?' + url_encode(dict(export='csv', **form), separator=b';') yield ('<pre>Query: %s\n' 'Trees (showing up to %d per text; ' 'export: <a href="%s">plain</a>, ' '<a href="%s">with line numbers</a>):\n' % ( form['query'] if len(form['query']) < 128 else form['query'][:128] + '...', TREELIMIT, url, url + ';linenos=1')) try: tmp = CORPORA[engine].trees(form['query'], selected, start, end, maxresults=TREELIMIT, nomorph='nomorph' in form, nofunc='nofunc' in form) except Exception as err: yield '<span class=r>%s</span>' % htmlescape(str(err).splitlines()[-1]) return for n, (filename, results) in enumerate(groupby(tmp, itemgetter(0))): textno = selected[filename] text = TEXTS[textno] if 'breakdown' in form: breakdown = Counter(DiscTree( max(high, key=lambda x: len(x.leaves()) if isinstance(x, Tree) else 1).freeze(), sent) for _, _, _, sent, high in results if high) yield '\n%s\n' % text for match, cnt in breakdown.most_common(): gotresults = True yield 'count: %5d\n%s\n\n' % ( cnt, DrawTree(match, match.sent).text( unicodelines=True, html=True)) continue for m, (filename, sentno, tree, sent, high) in enumerate(results): if m == 0: gotresults = True yield ("==> %s: [<a href=\"javascript: toggle('n%d'); \">" "toggle</a>]\n<span id=n%d>" % (text, n + 1, n + 1)) link = ('<a href="browse?text=%d;sent=%d%s%s">browse</a>' '|<a href="browsesents?%s">context</a>' % ( textno, sentno, ';nofunc' if 'nofunc' in form else '', ';nomorph' if 'nomorph' in form else '', url_encode(dict(text=textno, sent=sentno, query=form['query'], engine=engine), separator=b';'))) try: treerepr = DrawTree(tree, sent, highlight=high).text( unicodelines=True, html=True) except ValueError as err: line = "#%s \nERROR: %s\n%s\n%s\n" % ( sentno, err, tree, sent) else: line = "#%s [%s]\n%s\n" % (sentno, link, treerepr) yield line yield "</span>" yield '</pre>' if gotresults else "No matches."
def fragmentsinresults(form, doexport=False): """Extract recurring fragments from search results.""" engine = form.get('engine', 'tgrep2') if engine not in ('tgrep2', 'xpath', 'frag'): yield "Only applicable to treebanks." return gotresults = False filenames = {EXTRE.sub('', os.path.basename(a)): a for a in CORPORA[engine].files} selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)} start, end = getslice(form.get('slice')) uniquetrees = set() if not doexport: url = 'fragments?' + url_encode(dict(export='csv', **form), separator=b';') yield ('<pre>Query: %s\n' 'Fragments (showing up to %d fragments ' 'in the first %d search results from selected texts;\n' 'ordered by (freq ** 0.5 * numwords ** 2) ' '<a href="%s">Export</a>):\n' % (form['query'] if len(form['query']) < 128 else form['query'][:128] + '...', FRAGLIMIT, SENTLIMIT, url)) disc = engine != 'tgrep2' if disc: fragments.PARAMS.update(disc=True, fmt='discbracket') else: fragments.PARAMS.update(disc=False, fmt='bracket') for n, (_, _, treestr, _) in enumerate(CORPORA[engine].sents( form['query'], selected, start, end, maxresults=SENTLIMIT, brackets=True)): if n == 0: gotresults = True if engine == 'tgrep2': line = treestr.replace(" )", " -NONE-)") + '\n' elif engine == 'xpath': item = treebank.alpinotree( ElementTree.fromstring(treestr.encode('utf8'))) line = '%s\t%s\n' % (str(item.tree), ' '.join(item.sent)) elif engine == 'frag': line = treestr + '\n' else: raise ValueError uniquetrees.add(line.encode('utf8')) if not gotresults and not doexport: yield "No matches." return # TODO: get counts from whole text (preload) with tempfile.NamedTemporaryFile(delete=True) as tmp: tmp.writelines(uniquetrees) tmp.flush() results, approxcounts = fragments.regular([tmp.name], 1, None, 'utf8') if disc: results = nlargest(FRAGLIMIT, zip(results, approxcounts), key=lambda ff: sum(1 for a in ff[0][1] if a) ** 2 * ff[1] ** 0.5) else: results = nlargest(FRAGLIMIT, zip(results, approxcounts), key=lambda ff: sum(1 for _ in re.finditer(r'[^ ()]\)', ff[0])) ** 2 * ff[1] ** 0.5) gotresults = False if not doexport: yield "<ol>" for tree, freq in results: gotresults = True if disc: tree, sent = tree sent = ' '.join(a or '' for a in sent) if doexport: if disc: yield '%s\t%s\t%s\n' % (tree, sent, freq) else: yield '%s\t%s\n' % (tree, freq) else: if disc: link = "<a href='/draw?tree=%s;sent=%s'>draw</a>" % ( quote(tree.encode('utf8')), quote(sent.encode('utf8'))) sent = GETLEAVES.sub(' <font color=red>\\1</font>', htmlescape(' ' + sent + ' ')) tree = htmlescape(tree) + ' ' + sent else: link = "<a href='/draw?tree=%s'>draw</a>" % ( quote(tree.encode('utf8'))) tree = GETLEAVES.sub(' <font color=red>\\1</font>', htmlescape(tree)) tree = GETFRONTIERNTS.sub('(<font color=blue>\\1</font> )', tree) yield "<li>freq=%3d [%s] %s" % (freq, link, tree) if not doexport: yield "</ol>" if gotresults: yield '</pre>' else: yield "No fragments with freq > %d & nodes > %d." % ( MINNODES, MINFREQ)
def counts(form, doexport=False): """Produce graphs and tables for a set of queries. Queries should be given one per line, optionally prefixed by a name and a normalization query:: [name: ][normquery<tab>]query returns one graph for each query, and an overview with totals (optionally per category, if the first letters of each corpus name form a small set); """ # TODO: option to arrange graphs by text instead of by query norm = form.get('norm', 'sents') engine = form.get('engine', 'tgrep2') filenames = {EXTRE.sub('', os.path.basename(a)): a for a in CORPORA[engine].files} selected = {filenames[TEXTS[n]]: n for n in selectedtexts(form)} start, end = getslice(form.get('slice')) target = METADATA[form['target']] if form.get('target') else None target2 = METADATA[form['target2']] if form.get('target2') else None if not doexport: url = 'counts?' + url_encode(dict(export='csv', **form), separator=b';') yield ('Counts from queries ' '(<a href="%s">export to CSV</a>):\n' % url) if norm == 'query': normresults = CORPORA[engine].counts( form['normquery'], selected) # Combined results of all queries on each file combined = defaultdict(int) index = [TEXTS[n] for n in selected.values()] df = pandas.DataFrame(index=index) queries = querydict(form['query']) if not doexport: yield '<ol>%s</ol>\n' % '\n'.join( '<li><a href="#q%d">%s</a>' % (n, query) for n, query in enumerate(list(queries) + ['Combined results', 'Overview'], 1)) for n, (name, (normquery, query)) in enumerate( list(queries.items()) + [('Combined results', ('', None))], 1): cnts = Counter() sumtotal = 0 relfreq = {} resultsindices = None if query is None: if len(df.columns) == 1: break results = combined legend = '%sLegend:\t%s\n' % (64 * ' ', '\t'.join( '\n<font color=%s>%s</font>' % ( COLORS.get(n, 'black'), query) for n, query in enumerate(queries))) else: legend = '' if normquery: norm = 'query' normresults = CORPORA[engine].counts( normquery, selected, start, end) else: norm = form.get('norm', 'sents') results = CORPORA[engine].counts( query, selected, start, end, indices=False) if len(results) <= 32 and all( results[filename] < INDICESMAXRESULTS for filename in results): resultsindices = CORPORA[engine].counts( query, selected, start, end, indices=True) if not doexport: yield ('<a name=q%d><h3>%s</h3></a>\n<tt>%s</tt> ' '[<a href="javascript: toggle(\'n%d\'); ">' 'toggle results per text</a>]\n' '<div id=n%d style="display: none;"><pre>\n' % ( n, name, htmlescape(query) if query is not None else legend, n, n)) COLWIDTH = min(40, max(map(len, TEXTS)) + 2) for filename, cnt in sorted(results.items()): if query is None: cnt = combined[filename] else: combined[filename] += cnt textno = selected[filename] text = TEXTS[textno] cnts[text] = cnt if norm == 'consts': total = CORPUSINFO[engine][textno].numnodes elif norm == 'words': total = CORPUSINFO[engine][textno].numwords elif norm == 'sents': total = CORPUSINFO[engine][textno].len elif norm == 'query': total = normresults[filename] or 1 else: raise ValueError relfreq[text] = 100.0 * cnt / total sumtotal += total if not doexport: out = ('%s (<a href="browsesents?%s">browse</a>) ' '%5d %5.2f %%' % ( text.ljust(COLWIDTH)[:COLWIDTH], url_encode( dict(text=textno, sent=1, query=query or form['query'], engine=engine), separator=b';'), cnt, relfreq[text])) barcode = '' if resultsindices is not None: barcode = dispplot(resultsindices[filename], start or 1, end or CORPUSINFO[engine][textno].len) if cnt: yield out + barcode + '\n' else: yield '<span style="color: gray; ">%s%s</span>\n' % ( out, barcode) if not doexport or query is not None: df[name] = pandas.Series(relfreq) if not doexport: yield ('%s %5d %5.2f %%\n\n' % ( 'TOTAL'.ljust(COLWIDTH), sum(cnts.values()), 100.0 * sum(cnts.values()) / sumtotal)) yield '</pre></div>' if max(cnts.values()) == 0: continue elif form.get('slice'): # show absolute counts when all texts have been limited to same # number of sentences yield plot(cnts, max(cnts.values()), 'Absolute counts of \'%s\'' % name, unit='matches', target=target, target2=target2) else: yield plot(relfreq, max(relfreq.values()), 'Relative frequency of \'%s\'; norm=%s' % (name, norm), unit='%', target=target, target2=target2) if doexport: if form.get('export') == 'json': yield json.dumps(df.to_dict(), indent=2) else: yield df.to_csv(None) else: fmt = lambda x: '%g' % round(x, 1) yield '<h3><a name=q%d>Overview of patterns</a></h3>\n' % ( len(queries) + 2) # collate stats if form.get('target'): keys = METADATA[form['target']] else: keys = pandas.Series([key.split('_')[0] if '_' in key else key[0] for key in df.index], index=df.index) keyset = keys.unique() if len(keyset) <= 5: overview = OrderedDict( ('%s_%s' % (cat, query), df[query].ix[keys == cat].mean()) for query in df.columns for cat in keyset) df['category'] = keys yield '<pre>\n%s\n</pre>' % ( df.groupby('category').describe().to_string( float_format=fmt)) else: overview = OrderedDict((query, df[query].mean()) for query in df.columns) yield '<pre>\n%s\n</pre>' % df.describe().to_string( float_format=fmt) yield plot(overview, max(overview.values()), 'Relative frequencies of patterns' '(count / num_%s * 100)' % norm, unit='%', dosort=False, target=target, target2=target2)