def description(self): if self.info.has_key('urls'): url = self.info.get('urls')[0] info = api.get_url_info(url) text = info.description elif self.info.has_key('message'): text = self.info.get('message') elif self.info.has_key('version'): text = self.info.get('version')[-1].get('content') if not text: return '' query = api.re.compile(self.query, api.re.IGNORECASE) description = [] sentences = text.split('.') for sentence in sentences: if query.findall(sentence): s = query.sub('<b>%s</b>' % self.query, sentence.strip()) description.append(s) else: sep_char = '... ' if len(description) == 1: sep_char = '. ' elif len(description) > 2: break description = sep_char.join(description) return description + '...'
def urls(self): if self.info.has_key('urls'): return [api.get_url_info(url) for url in self.info.get('urls')] else: return []
def urls(self): return [api.get_url_info(u) for u in api.extract_urls(self.message)]
def urls(self): if self.info.has_key('urls'): return [api.get_url_info(url, db_name=self.db_name) \ for url in self.info.get('urls')] else: return []
def urls(self): return [api.get_url_info(u, db_name=self.db_name) \ for u in api.extract_urls(self.message)]
def flavored_markdown(text): key = '%s:flavored_markdown' % hash(text) html = cache.get(key, namespace="filters") if html: return html text = ' ' + text + ' ' text = unescape(text) # extract Reference-style links reference_urls = REFERENCE_URL_REGEX.findall(text) reference_urls = [i[0] for i in reference_urls] for i in reference_urls: text = text.replace(i, md5(i).hexdigest()) # extract urls urls = URL_REGEX.findall(text) urls = [i[0] for i in urls if i] urls.sort(key=len, reverse=True) for url in urls: for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']: if pattern % url in text: text = text.replace(pattern % url, pattern % md5(url).hexdigest()) break # extract emoticons and symbols symbols = EMOTICONS.keys() symbols.extend(SYMBOLS.keys()) symbols.sort(key=len, reverse=True) for symbol in symbols: for pattern in [ ' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n', '%s\r\n' ]: if pattern % symbol in text: text = text.replace(pattern % symbol, pattern % md5(symbol).hexdigest()) break # extract mentions mentions = re.findall('(@\[.*?\))', text) if mentions: for mention in mentions: text = text.replace(mention, md5(mention).hexdigest()) # extract hashtags hashtags = re.findall('(#\[.*?\))', text) if hashtags: for hashtag in hashtags: text = text.replace(hashtag, md5(hashtag).hexdigest()) # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle words_with_underscores = [w for w in \ re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \ if not w.startswith('_')] for word in words_with_underscores: text = text.replace(word, md5(word).hexdigest()) # treats newlines in paragraph-like content as real line breaks text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d') text = text.strip().replace('\r\n', '<br>').replace( '\n', '<br>') # normalize \r\n and \n to <br> text = text.strip().replace('<br>', ' \n') # treats newlines text = text.strip().replace('|| \n', '||\n') # undo if wiki-tables text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>') # restore reference_urls for i in reference_urls: text = text.replace(md5(i).hexdigest(), i) # convert text to html html = markdown(text, extras=[ "wiki-tables", "cuddled-lists", "fenced-code-blocks", "header-ids", "code-friendly", "pyshell", "footnotes" ]) # print html # extract code-blocks html = html.replace( '\n', '<br/>') # convert multi-lines to single-lines for regex matching code_blocks = re.findall('(<code>.*?</code>)', html) for block in code_blocks: html = html.replace(block, md5(block).hexdigest()) # Show emoticons and symbols for symbol in symbols: if SYMBOLS.has_key(symbol): html = html.replace(md5(symbol).hexdigest(), SYMBOLS[symbol]) else: html = html.replace( md5(symbol).hexdigest(), EMOTICONS[symbol].replace("<img src", "<img class='emoticon' src")) # Autolinks urls, mentions, hashtags, turn youtube links to embed code for url in urls: title = api.get_url_info(url).title hash_string = md5(url).hexdigest() if len(url) > 40: html = html.replace( hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url[:40] + '...')) else: html = html.replace( hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url)) for mention in mentions: hash_string = md5(mention).hexdigest() user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match( mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] html = html.replace( hash_string, '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) for hashtag in hashtags: hash_string = md5(hashtag).hexdigest() tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match( hashtag).groupdict() tag['id'] = tag['id'].split(':', 1)[-1] html = html.replace( hash_string, '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) # Restore code blocks for block in code_blocks: html = html.replace(md5(block).hexdigest(), block) # restore urls, mentions, emoticons and hashtag in code blocks for url in urls: html = html.replace(md5(url).hexdigest(), url) for mention in mentions: html = html.replace(md5(mention).hexdigest(), mention) for hashtag in hashtags: html = html.replace(md5(hashtag).hexdigest(), hashtag) for symbol in symbols: html = html.replace(md5(symbol).hexdigest(), symbol) # restore words with underscores for word in words_with_underscores: html = html.replace(md5(word).hexdigest(), word) # restore \n html = html.replace('<br/>', '\n') # xss protection html = sanitize_html(html) if not html or html.isspace(): return '' # add target="_blank" to all a tags html = PyQuery(html) html('a:not(.overlay)').attr('target', '_blank') html = str(html) html = html.replace('<br/>', '<br>') cache.set(key, html, namespace="filters") return html
def flavored_markdown(text): key = '%s:flavored_markdown' % hash(text) html = cache.get(key, namespace="filters") if html: return html text = ' ' + text + ' ' text = unescape(text) # extract Reference-style links reference_urls = REFERENCE_URL_REGEX.findall(text) reference_urls = [i[0] for i in reference_urls] for i in reference_urls: text = text.replace(i, md5(i).hexdigest()) # extract urls urls = URL_REGEX.findall(text) urls = [i[0] for i in urls if i] urls.sort(key=len, reverse=True) for url in urls: for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']: if pattern % url in text: text = text.replace(pattern % url, pattern % md5(url).hexdigest()) break # extract emoticons and symbols symbols = EMOTICONS.keys() symbols.extend(SYMBOLS.keys()) symbols.sort(key=len, reverse=True) for symbol in symbols: for pattern in [' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n', '%s\r\n']: if pattern % symbol in text: text = text.replace(pattern % symbol, pattern % md5(symbol).hexdigest()) break # extract mentions mentions = re.findall('(@\[.*?\))', text) if mentions: for mention in mentions: text = text.replace(mention, md5(mention).hexdigest()) # extract hashtags hashtags = re.findall('(#\[.*?\))', text) if hashtags: for hashtag in hashtags: text = text.replace(hashtag, md5(hashtag).hexdigest()) # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle words_with_underscores = [w for w in \ re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \ if not w.startswith('_')] for word in words_with_underscores: text = text.replace(word, md5(word).hexdigest()) # treats newlines in paragraph-like content as real line breaks text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d') text = text.strip().replace('\r\n', '<br>').replace('\n', '<br>') # normalize \r\n and \n to <br> text = text.strip().replace('<br>', ' \n') # treats newlines text = text.strip().replace('|| \n', '||\n') # undo if wiki-tables text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>') # restore reference_urls for i in reference_urls: text = text.replace(md5(i).hexdigest(), i) # convert text to html html = markdown(text, extras=["wiki-tables", "cuddled-lists", "fenced-code-blocks", "header-ids", "code-friendly", "pyshell", "footnotes"]) # print html # extract code-blocks html = html.replace('\n', '<br/>') # convert multi-lines to single-lines for regex matching code_blocks = re.findall('(<code>.*?</code>)', html) for block in code_blocks: html = html.replace(block, md5(block).hexdigest()) # Show emoticons and symbols for symbol in symbols: if SYMBOLS.has_key(symbol): html = html.replace(md5(symbol).hexdigest(), SYMBOLS[symbol]) else: html = html.replace(md5(symbol).hexdigest(), EMOTICONS[symbol].replace("<img src", "<img class='emoticon' src")) # Autolinks urls, mentions, hashtags, turn youtube links to embed code for url in urls: title = api.get_url_info(url).title hash_string = md5(url).hexdigest() if len(url) > 40: html = html.replace(hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url[:40] + '...')) else: html = html.replace(hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url)) for mention in mentions: hash_string = md5(mention).hexdigest() user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] html = html.replace(hash_string, '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) for hashtag in hashtags: hash_string = md5(hashtag).hexdigest() tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict() tag['id'] = tag['id'].split(':', 1)[-1] html = html.replace(hash_string, '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) # Restore code blocks for block in code_blocks: html = html.replace(md5(block).hexdigest(), block) # restore urls, mentions, emoticons and hashtag in code blocks for url in urls: html = html.replace(md5(url).hexdigest(), url) for mention in mentions: html = html.replace(md5(mention).hexdigest(), mention) for hashtag in hashtags: html = html.replace(md5(hashtag).hexdigest(), hashtag) for symbol in symbols: html = html.replace(md5(symbol).hexdigest(), symbol) # restore words with underscores for word in words_with_underscores: html = html.replace(md5(word).hexdigest(), word) # restore \n html = html.replace('<br/>', '\n') # xss protection html = sanitize_html(html) if not html or html.isspace(): return '' # add target="_blank" to all a tags html = PyQuery(html) html('a:not(.overlay)').attr('target', '_blank') html = str(html) html = html.replace('<br/>', '<br>') cache.set(key, html, namespace="filters") return html
def autolink(text): if not text: return text key = '%s:autolink' % hash(text) out = cache.get(key, namespace="filters") if out: return out if re.match(EMAIL_RE, text): email = text user_id = api.get_user_id_from_email_address(email) user = api.get_user_info(user_id) return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name) s = text or '' s += ' ' s = str(s) # convert unicode to string s = s.replace('\r\n', '\n') urls = api.extract_urls(s) urls = list(set(urls)) urls.sort(key=len, reverse=True) for url in urls: hash_string = md5(url).hexdigest() info = api.get_url_info(url) if not url.startswith('http'): s = s.replace( url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string)) elif len(url) > 60: u = url[:60] for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace( template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, md5(u + '...').hexdigest()))) break else: for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace( template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string))) break for url in urls: s = s.replace(md5(url).hexdigest(), url) if len(url) > 60 and url.startswith('http'): s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...') mentions = MENTIONS_RE.findall(s) if mentions: for mention in mentions: if '](topic:' in mention: topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match( mention).groupdict() topic['id'] = topic['id'].split(':', 1)[-1] #TODO: update topic name? s = s.replace( mention, '<a href="/chat/topic/%s" class="chat">%s</a>' % (topic.get('id'), topic.get('name'))) elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match( mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] s = s.replace( mention, '<a href="/user/%s" class="async"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) else: group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match( mention).groupdict() group['id'] = group['id'].split(':', 1)[-1] s = s.replace( mention, '<a href="/group/%s" class="async"><span class="tag">%s</span></a>' % (group.get('id'), group.get('name'))) # hashtags = re.compile('(#\[.*?\))').findall(s) # if hashtags: # for hashtag in hashtags: # tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict() # tag['id'] = tag['id'].split(':', 1)[-1] # s = s.replace(hashtag, # '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) cache.set(key, s, namespace="filters") return s
def autolink(text): if not text: return text key = '%s:autolink' % hash(text) out = cache.get(key, namespace="filters") if out: return out if re.match(EMAIL_RE, text): email = text user_id = api.get_user_id_from_email_address(email) user = api.get_user_info(user_id) return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name) s = text or '' s += ' ' s = str(s) # convert unicode to string s = s.replace('\r\n', '\n') urls = api.extract_urls(s) urls = list(set(urls)) urls.sort(key=len, reverse=True) for url in urls: hash_string = md5(url).hexdigest() info = api.get_url_info(url) if not url.startswith('http'): s = s.replace(url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string)) elif len(url) > 60: u = url[:60] for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace(template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, md5(u + '...').hexdigest()))) break else: for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace(template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string))) break for url in urls: s = s.replace(md5(url).hexdigest(), url) if len(url) > 60 and url.startswith('http'): s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...') mentions = MENTIONS_RE.findall(s) if mentions: for mention in mentions: if '](topic:' in mention: topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() topic['id'] = topic['id'].split(':', 1)[-1] #TODO: update topic name? s = s.replace(mention, '<a href="/chat/topic/%s" class="chat">%s</a>' % (topic.get('id'), topic.get('name'))) elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] s = s.replace(mention, '<a href="/chat/user/%s" class="chat"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) else: group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() group['id'] = group['id'].split(':', 1)[-1] s = s.replace(mention, '<a href="/group/%s" class="async"><span class="tag">%s</span></a>' % (group.get('id'), group.get('name'))) # hashtags = re.compile('(#\[.*?\))').findall(s) # if hashtags: # for hashtag in hashtags: # tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict() # tag['id'] = tag['id'].split(':', 1)[-1] # s = s.replace(hashtag, # '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) cache.set(key, s, namespace="filters") return s