def genre(self, page, subpage, genres, type): genre_ids = [58,69,57,59,84,86,60,79,77,93,89,82,71,66,95,88,75,85,83, 90,63,94,72,73,67,87,78,61,70,91,92,64,96,68,62,65,76,80,74,81,98,97] genre_titles = ['Action', 'Adventure', 'Cars', 'Comedy', 'Dementia', 'Demons', 'Drama', 'Ecchi', 'Fantasy', 'Game', 'Harem', 'Historical', 'Horror', 'Josei', 'Kids', 'Magic', 'Martial Arts', 'Mecha', 'Military', 'Music', 'Mystery', 'Parody', 'Police', 'Psychological', 'Romance', 'Samurai', 'School', 'Sci-Fi', 'Seinen', 'Shoujo', 'Shoujo Ai', 'Shounen', 'Shounen Ai', 'Slice of Life', 'Space', 'Sports', 'Super Power', 'Supernatural', 'Thriller', 'Vampire', 'Yaoi', 'Yuri'] if genres is None: genres = xbmcgui.Dialog().multiselect("Genre", genre_titles) else: genres = json.loads(genres) list = [] for i in genres: list.append(genre_ids[int(i)]) list = cache.get(scraper().genreScrape, 24, list, page, subpage, type) subpage, page = self.subpagination(subpage, page) self.list_builder(list) self.addDirectoryItem('Next', 'genreSearch', page=page, genres=genres, subpage=subpage, type=type) self.createDirectory(sort=False)
def remove_empty_lines(html): key = '%s:remove_empty_lines' % hash(html) out = cache.get(key, namespace="filters") if out: return out if '</' in html: html = html.strip().replace('\n', '') soup = BeautifulSoup(html) lines = [] for element in soup.contents: if isinstance(element, Tag): if element.text: lines.append(str(element).strip()) elif 'br' in str(element): lines.append('\n') elif isinstance(element, NavigableString): lines.append(str(element).strip()) out = ''.join(lines).strip() while '\n\n' in out: out = out.replace('\n\n', '\n') else: out = '\n'.join([line for line in html.split('\n') if line.strip()]) cache.set(key, out, namespace="filters") return out
def get_trend_echos(url_echos, report): """ Get Echos trend """ if not url_echos: return report url = url_echos.replace('/action-', '/recommandations-action-') content = cache.get(url) if content: soup = BeautifulSoup(content, 'html.parser') for i in soup.find_all('div', 'tendance hausse'): if 'court terme' in i.text: report['echos']['short term'] = 'Hausse' if 'moyen terme' in i.text: report['echos']['mid term'] = 'Hausse' for i in soup.find_all('div', 'tendance egal'): if 'court terme' in i.text: report['echos']['short term'] = 'Neutre' if 'moyen terme' in i.text: report['echos']['mid term'] = 'Neutre' for i in soup.find_all('div', 'tendance baisse'): if 'court terme' in i.text: report['echos']['short term'] = 'Baisse' if 'moyen terme' in i.text: report['echos']['mid term'] = 'Baisse' return report
def searchNew(self): control.busy() t = control.lang(32010).encode('utf-8') k = control.keyboard('', t) k.doModal() q = k.getText() if (q == None or q == ''): return try: from sqlite3 import dbapi2 as database except: from pysqlite2 import dbapi2 as database dbcon = database.connect(control.searchFile) dbcur = dbcon.cursor() dbcur.execute("INSERT INTO search VALUES (?,?)", (None, q)) dbcon.commit() dbcur.close() list = cache.get(scraper().search, 24, q) self.list_builder(list) control.idle() self.createDirectory(sort=False)
def fetch_location_dict(area_id): key = GROCERY_LOCATION_KEY + u'{}'.format(area_id) location_dict = cache.get(key) if not location_dict: location_url = config.LOCATIONURL + str(area_id) + '/' headers = {'Authorization': config.TOKEN} response = make_api_call(location_url, headers=headers) try: data_list = json.loads(response.text) except Exception as e: logger.exception(e) return False, None, u'Unable to fetch area details' if not data_list: return False, None, u'Area Does not exist' data = data_list[0] location_dict = dict() location_dict['area'] = data.get('areaid') location_dict['country'] = [data.get('countryid')] location_dict['state'] = [data.get('stateid')] location_dict['city'] = [data.get('cityid')] location_dict['zone'] = [data.get('zoneid')] cache.set(key, location_dict, ex=GROCERY_CACHE_TTL) return True, location_dict, None
def get_trend_frtn(url_frtn, report): """ Get FRTN trend """ if not url_frtn: return report, None market = int(url_frtn.split('-')[-1]) isin = url_frtn.split('-')[-2] trend_url = common.decode_rot('uggcf://obhefr.sbegharb.se/ncv/inyhr/geraqf/NPGVBAF/SGA') + \ '{market:06d}{isin}'.format(market=market, isin=isin) content = cache.get(trend_url) if content and content != 'null': try: json_content = json.loads(content) mapping = { 'POSITIVE': 'Hausse', 'NEUTRE': 'Neutre', 'NEGATIVE': 'Baisse', } if 'opinionCT' in json_content and json_content[ 'opinionCT'] in mapping: report['frtn']['short term'] = mapping[ json_content['opinionCT']] if 'opinionMT' in json_content and json_content[ 'opinionMT'] in mapping: report['frtn']['mid term'] = mapping[json_content['opinionMT']] except json.decoder.JSONDecodeError: pass return report, isin
def check_resolver(self): try: r = cache.get( client.request, 1, base64.b64decode( 'aHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL3hpYmFsYmExMC9zY3JpcHQubW9kdWxlLmFkdWx0cmVzb2x2ZXIvbWFzdGVyL2xpYi9hZHVsdHJlc29sdmVyL3Jlc29sdmVyLnB5' )) if len(r) > 1: with open(self.resolverFile, 'r') as f: compfile = f.read() if 'import' in r: if compfile == r: log_utils.log('Resolver checked and up to date!', log_utils.LOGNOTICE) pass else: with open(self.resolverFile, 'w') as f: f.write(r) log_utils.log('Resolver updated!', log_utils.LOGNOTICE) kodi.notify(msg='Resolver Updated.', duration=1250, sound=True) except Exception as e: log_utils.log( 'Error checking for resolver update :: Error: %s' % str(e), log_utils.LOGERROR)
def _render(info, post_type, owner, viewport, mode=None, **kwargs): owner_id = 'public' if (not owner or not owner.id) else owner.id if post_type in ['note', 'feed', 'file']: if mode: key = '%s:%s' % (viewport, mode) else: key = viewport if (owner and owner.id and owner.id != info.last_action.owner.id and owner.id not in info.read_receipt_ids and viewport != "discover"): status = 'unread' elif viewport == 'news_feed' and owner.id and owner.id in info.pinned_by: status = 'pinned' elif viewport == 'news_feed' and owner.id and owner.id in info.archived_by: status = 'archived' else: status = None if status: key = key + ':' + status key += ':%s:%s' % (post_type, owner_id) namespace = info.id else: key = post_type namespace = owner_id html = cache.get(key, namespace) hit = False if not html: if post_type == 'note': html = NOTE_TEMPLATE.render(note=info, owner=owner, view=viewport, mode=mode, **kwargs) elif post_type == 'file': html = FILE_TEMPLATE.render(file=info, owner=owner, view=viewport, mode=mode, **kwargs) else: html = FEED_TEMPLATE.render(feed=info, owner=owner, view=viewport, mode=mode, **kwargs) cache.set(key, html, 86400, namespace) else: hit = True html = html.replace('<li id="post', '<li data-key="%s" data-namespace="%s" data-cache-status="%s" id="post' % (key, namespace, "HIT" if hit else "MISS")) return html
def searchOld(self, q): list = cache.get(scraper().search, 24, q) self.list_builder(list) self.createDirectory(sort=False) return
def episodeList(self, url, slug): list = cache.get(scraper().episodeList, 24, url) for item in list: self.addDirectoryItem(item['meta']['title'], "playItem", url=item['url'], type=item['type'], slug=slug, is_folder=False, playable=True, meta=item['meta'], art=item['art']) self.createDirectory(sort=False)
def _get_saved_list(): try: return store.get(_saved_list_key) except KeyError: pass try: # backward compatible (try cache) return cache.get(_saved_list_key) except KeyError: return []
def get_potential(url_brsrm, url_frtn, cours): """ Returns the potential for 3 month """ report = dict() report['brsrm'] = dict() report['brsrm']['value'] = None report['brsrm']['percentage'] = 0 report['frtn'] = dict() report['frtn']['value'] = None report['frtn']['percentage'] = 0 if url_brsrm: content = cache.get(url_brsrm) if content: soup = BeautifulSoup(content, 'html.parser') for i in soup.find_all('p'): if 'Objectif de cours' in i.text: value = i.find('span', 'u-text-bold') if not value: return report report['brsrm']['value'] = common.clean_data( value.text, json_load=False).split()[0] if cours: val = float(cours['cotation']['valorisation'].replace( ',', '.').split()[0]) report['brsrm']['percentage'] = round( (float(report['brsrm']['value']) / val - 1) * 100, 1) if url_frtn: market = int(url_frtn.split('-')[-1]) isin = url_frtn.split('-')[-2] avis_url = common.decode_rot('uggcf://obhefr.sbegharb.se/ncv/inyhr/nivf/SGA') + \ '{market:06d}{isin}'.format(market=market, isin=isin) content = cache.get(avis_url) if content: try: json_content = json.loads(content) report['frtn']['value'] = json_content['consensus']['objectif'] report['frtn']['percentage'] = round( float(json_content['consensus']['potentiel']) * 100, 1) except json.decoder.JSONDecodeError: pass return report
def playItem(self, slug, url): control.busy() resolve_dialog = xbmcgui.DialogProgress() link_list = cache.get(scraper().scrapeLinks, 24, slug, url) control.idle() if len(link_list) == 0: dialog = xbmcgui.Dialog() dialog.notification('Anime Incursion', 'No Links Available', xbmcgui.NOTIFICATION_INFO, 5000) else: resolve_dialog.create('Anime Incursion', '') resolve_dialog.update(0) link_list = sorted(link_list, key=lambda x: (x['quality']), reverse=True) link_total = len(link_list) progress = 0 path = '' for i in link_list: # if resolve_dialog.iscanceled() == True: # return progress += 1 resolve_dialog.update( int((100 / float(link_total)) * progress), str(progress) + ' | [B]Host: ' + i['name'].upper() + "[/B] | [B]Resolution: " + str(i['quality']) + "p[/B]") try: if i['direct'] == False: import resolveurl path = resolveurl.resolve(i['url']).encode('utf-8') break else: path = i['url'] break except: continue if path != '': play_item = xbmcgui.ListItem(path=path) print('INFO - ' + str(sys.argv[1])) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, listitem=play_item) else: dialog = xbmcgui.Dialog() dialog.notification('Anime Incursion', 'Unable to Resolve Links', xbmcgui.NOTIFICATION_INFO, 5000)
def showList(self, page, type, subpage, order='score_desc'): list = cache.get(scraper().filterScrape, 24, page, type, order, subpage) subpage, page = self.subpagination(subpage, page) self.list_builder(list) self.addDirectoryItem('Next', 'showList', page=page, type=type, order=order, subpage=subpage) self.createDirectory(sort=False)
def to_text(html): try: html = unicode(html) except UnicodeDecodeError: pass key = '%s:to_text' % hash(html) out = cache.get(key, namespace="filters") if not out: out = api.remove_html_tags(html) cache.set(key, out, namespace="filters") return out
def get_cours(isin, mic, disable_cache=False): """ Returns core info from isin """ url = common.decode_rot('uggcf://yrfrpubf-obhefr-sb-pqa.jyo.nj.ngbf.arg') + \ common.decode_rot('/fgernzvat/pbhef/trgPbhef?') + \ 'code={}&place={}&codif=ISIN'.format(isin, mic) content = cache.get(url, verify=False, disable_cache=disable_cache) cours = None if content: cours = common.clean_data(content) return cours
def lines_truncate(text, lines_count=5): key = '%s:lines_truncate' % hash(text) out = cache.get(key, namespace="filters") # if out: # return out raw = text text = _normalize_newlines(text) # remove blank lines lines = [line for line in text.split('\n') if line.strip()] # text = '\n'.join(lines) images = re.compile('<img.*?>', re.IGNORECASE).findall(text) for i in images: text = text.replace(i, md5(i).hexdigest()) links = re.compile('<a.*?</a>', re.IGNORECASE).findall(text) for i in links: text = text.replace(i, md5(i).hexdigest()) text = text.replace('<br/>', '<br>') text = text.replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d') # md5('<br>').hexdigest() = '8b0f0ea73162b7552dda3c149b6c045d' text = text.strip().replace('\n', '<br>') words_per_line = 15 longest_line = max(lines[:lines_count], key=len) if len(lines) != 0 else None if longest_line and len(longest_line.split()) > words_per_line: lines = textwrap.wrap(text) else: lines = [line for line in text.split('<br>') if line.strip()] # skip blank lines (and blank lines quote) if len([line for line in lines if line.strip() and line.strip() != '>']) >= lines_count: blank_lines = len([line for line in lines if line.strip() in ['', '>']]) out = ' '.join(lines[:lines_count+blank_lines]) else: out = text if len(out) < len(text): text = ' '.join(text[:len(out)].split()[0:-1]).rstrip('.') + '...' if len(text) / float(len(raw)) > 0.7: # nếu còn 1 ít text thì hiện luôn, không cắt làm gì cho mệt text = raw out = text.replace('<br>', '\n') out = out.replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>') for i in images: out = out.replace(md5(i).hexdigest(), i) for i in links: out = out.replace(md5(i).hexdigest(), i) cache.set(key, out, namespace="filters") return out
def _convert_to_text(html): try: html = unicode(html) except UnicodeDecodeError: pass key = '%s:convert_to_text' % hash(html) out = cache.get(key, namespace="filters") if not out: html = fix_unclosed_tags(html) plain_text = api.remove_html_tags(html) cache.set(key, out, namespace="filters") return out
def unmunge(html): """Clean up Word HTML""" if 'mso' in html: # remove outlook html style key = '%s:unmunge' % hash(html) out = cache.get(key, namespace="filters") if not out: html = re.sub(re.compile('p"mso.*?"'), 'p', html) html = re.sub(re.compile('( style=".*?")'), '', html) out = unmungeHtml(html.decode('utf-8')) cache.set(key, out, namespace="filters") return out return html
def test_data_cache(data_cache): cache = data_cache # make sure new cache has been initialized and is empty # at this point it should only contain: {"data": {}, "last_update": None} assert len(cache.cache.keys()) == 2 cache.save() # add stuff to cache cache.add("test_item", "something") # and test retrieval of stuff assert cache.get("test_item") == "something" assert cache.is_known("test_item") assert type(cache.get_timestamp()) == datetime.datetime
def get_url_echos(isin, mic): """ Return Echos URL """ url = common.decode_rot('uggcf://yrfrpubf-obhefr-sb-pqa.jyo.nj.ngbf.arg') + \ common.decode_rot('/fgernzvat/pbhef/oybpf/trgUrnqreSvpur?') + \ 'code={}&place={}&codif=ISIN'.format(isin, mic) content = cache.get(url, verify=False) if not content: return None header_fiche = common.clean_data(content) url_echos = common.clean_url( header_fiche['headerFiche']['tweetHeaderFiche']) return url_echos
def get(isin, years=3): """ Get 3 years history of this ISIN """ url = common.decode_rot( 'uggcf://yrfrpubf-obhefr-sb-pqa.jyo.nj.ngbf.arg/SQF/uvfgbel.kzy?' + 'ragvgl=rpubf&ivrj=NYY&pbqvsvpngvba=VFVA&rkpunatr=KCNE&' + 'nqqQnlYnfgCevpr=snyfr&nqwhfgrq=gehr&onfr100=snyfr&' + 'frffJvguAbDhbg=snyfr&crevbq={}L&tenahynevgl=&aoFrff=&'.format(years) + 'vafgeGbPzc=haqrsvarq&vaqvpngbeYvfg=&pbzchgrIne=gehr&' + 'bhgchg=pfiUvfgb&') + 'code={}'.format(isin) content = cache.get(url, verify=False) if content: return content.split('\n') return ''
def get_url_brsrm(isin): """ Return Brsrm URL """ base_url = common.decode_rot('uggcf://jjj.obhefbenzn.pbz') search_path = common.decode_rot('/erpurepur/nwnk?dhrel=') content = cache.get(base_url + search_path + isin) if not content: return None soup = BeautifulSoup(content, 'html.parser') if soup.find('a', 'search__list-link') is None \ or 'href' not in soup.find('a', 'search__list-link'): return None path = soup.find('a', 'search__list-link')['href'] return base_url + path
def get_url_frtn(isin): """ Return Frtn URL """ base_url = 'https://bourse.fortuneo.fr/api/search?term={}'.format(isin) content = cache.get(base_url) if not content: return None try: json_content = json.loads(content) except json.decoder.JSONDecodeError: return None try: url = json_content['searchResults']['market']['arkea']['items'][0][ 'url'] except (KeyError, IndexError): return None return url
def decorated_function(*args, **kwargs): session_id = session.get("session_id") user_id = api.get_user_id(session_id) if user_id and request.method in ["GET", "OPTIONS"]: if request.query_string: key = "%s: %s %s?%s" % (user_id, request.method, request.path, request.query_string) else: key = "%s: %s %s" % (user_id, request.method, request.path) rv = cache.get(key) if not rv: rv = f(*args, **kwargs) cache.set(key, rv) return rv elif user_id and request.method == "POST": key = "%s:*" % user_id cache.clear(key) return f(*args, **kwargs)
def get_dividend_brsrm(url_brsrm, report): """ Get dividend from BRSRM """ if not url_brsrm: return report content = cache.get(url_brsrm) if not content: return report soup = BeautifulSoup(content, 'html.parser') for div_relative in soup.find_all('div', 'u-relative'): if 'Rendement' not in div_relative.text: continue if len(div_relative.find_all('td')) < 6: continue report['brsrm']['percent'] = float( common.clean_data(div_relative.find_all('td')[6].\ text, json_load=False).split()[0].split('%')[0]) return report
def decorated_function(*args, **kwargs): session_id = session.get('session_id') user_id = api.get_user_id(session_id) if user_id and request.method in ['GET', 'OPTIONS']: if request.query_string: key = '%s: %s %s?%s' % (user_id, request.method, request.path, request.query_string) else: key = '%s: %s %s' % (user_id, request.method, request.path) rv = cache.get(key) if not rv: rv = f(*args, **kwargs) cache.set(key, rv) return rv elif user_id and request.method == 'POST': key = '%s:*' % user_id cache.clear(key) return f(*args, **kwargs)
def compute_benefices(report): """ Get necessary informations and returns an approximation of the profit development """ indice = '1eCCK5' count = 1 continue_req = True while continue_req: url = common.decode_rot('uggcf://jjj.obhefbenzn.pbz/obhefr/npgvbaf/' + 'cnyznerf/qvivqraqrf/cntr-{}?'.format(count) + 'znexrg={}&inevngvba=6'.format(indice)) content = cache.get(url) continue_req = content != '' if continue_req: profit = parse_profit(BeautifulSoup(content, 'html.parser'), report) if profit != 0: return profit count += 1 return None
def description(html): try: html = unicode(html) except UnicodeDecodeError: pass key = '%s:description' % hash(html) out = cache.get(key, namespace="filters") if out: return out if '</' in html: plain_text = _convert_to_text(html) else: plain_text = html lines = [] for line in plain_text.split('\n'): if '(' in line or ')' in line: continue elif '[' in line or ']' in line: continue elif '/' in line: continue elif ';' in line: continue elif ' ' in line \ and len(line) > 15 \ and line.count('.') < 2 \ and 'dear' not in line.lower() \ and 'hi' not in line.lower() \ and 'unsubscribe' not in line.lower(): lines.append(clean(line)) else: continue lines.sort(key=len) if lines: out = lines[-1].rstrip('.') + '...' else: out = '...' cache.set(key, out, namespace="filters") return out
def sanitize_html(value): ''' https://stackoverflow.com/questions/16861/sanitising-user-input-using-python ''' if '</' not in value: # không phải HTML return value key = '%s:sanitize_html' % hash(value) out = cache.get(key, namespace="filters") if out: return out base_url = None rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:')) rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:')) re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE) # validTags = 'p i strong b u a h1 h2 h3 h4 pre br img ul ol li blockquote em code hr'.split() validTags = 'a abbr b blockquote code del ins dd dl dt em h2 h3 h4 i img kbd li ol p pre s small sup sub strong strike table tbody th tr td ul br hr div span'.split( ) validAttrs = 'src width height alt title class href'.split() urlAttrs = 'href title'.split() # Attributes which should have a URL soup = BeautifulSoup(value.decode('utf-8')) for comment in soup.findAll(text=lambda text: isinstance(text, Comment)): # Get rid of comments comment.extract() for tag in soup.findAll(True): if tag.name not in validTags: tag.hidden = True attrs = tag.attrs tag.attrs = [] for attr, val in attrs: if attr in validAttrs: val = re_scripts.sub('', val) # Remove scripts (vbs & js) if attr in urlAttrs: val = urljoin(base_url, val) # Calculate the absolute url tag.attrs.append((attr, val)) out = soup.renderContents().decode('utf8') cache.set(key, out, namespace="filters") return out
def fix_unclosed_tags(html): if not html: return html try: html = unicode(html) except UnicodeDecodeError: pass try: key = '%s:fix_unclosed_tags' % hash(html) out = cache.get(key, namespace="filters") if out: return out h = lxml.html.fromstring(html) out = lxml.html.tostring(h) cache.set(key, out, namespace="filters") return out except Exception: return ''
def sanitize_html(value): ''' https://stackoverflow.com/questions/16861/sanitising-user-input-using-python ''' if '</' not in value: # không phải HTML return value key = '%s:sanitize_html' % hash(value) out = cache.get(key, namespace="filters") if out: return out base_url=None rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:')) rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:')) re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE) # validTags = 'p i strong b u a h1 h2 h3 h4 pre br img ul ol li blockquote em code hr'.split() validTags = 'a abbr b blockquote code del ins dd dl dt em h2 h3 h4 i img kbd li ol p pre s small sup sub strong strike table tbody th tr td ul br hr div span'.split() validAttrs = 'src width height alt title class href'.split() urlAttrs = 'href title'.split() # Attributes which should have a URL soup = BeautifulSoup(value.decode('utf-8')) for comment in soup.findAll(text=lambda text: isinstance(text, Comment)): # Get rid of comments comment.extract() for tag in soup.findAll(True): if tag.name not in validTags: tag.hidden = True attrs = tag.attrs tag.attrs = [] for attr, val in attrs: if attr in validAttrs: val = re_scripts.sub('', val) # Remove scripts (vbs & js) if attr in urlAttrs: val = urljoin(base_url, val) # Calculate the absolute url tag.attrs.append((attr, val)) out = soup.renderContents().decode('utf8') cache.set(key, out, namespace="filters") return out
def get_dividend_frtn(url_frtn, report): """ Get dividend from FRTN """ if not url_frtn: return report market = int(url_frtn.split('-')[-1]) isin = url_frtn.split('-')[-2] avis_url = common.decode_rot('uggcf://obhefr.sbegharb.se/ncv/inyhr/nivf/SGA') + \ '{market:06d}{isin}'.format(market=market, isin=isin) content = cache.get(avis_url) if not content: return report try: json_content = json.loads(content) if len(json_content['consensus']['listeAnnee']) > 1: report['frtn']['percent'] = round( float(json_content['consensus']['listeAnnee'][1]['rendement']) * 100, 2) except json.decoder.JSONDecodeError: pass return report
def autolink(text): if not text: return text key = '%s:autolink' % hash(text) out = cache.get(key, namespace="filters") if out: return out if re.match(EMAIL_RE, text): email = text user_id = api.get_user_id_from_email_address(email) user = api.get_user_info(user_id) return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name) s = text or '' s += ' ' s = str(s) # convert unicode to string s = s.replace('\r\n', '\n') urls = api.extract_urls(s) urls = list(set(urls)) urls.sort(key=len, reverse=True) for url in urls: hash_string = md5(url).hexdigest() info = api.get_url_info(url) if not url.startswith('http'): s = s.replace(url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string)) elif len(url) > 60: u = url[:60] for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace(template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, md5(u + '...').hexdigest()))) break else: for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace(template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string))) break for url in urls: s = s.replace(md5(url).hexdigest(), url) if len(url) > 60 and url.startswith('http'): s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...') mentions = MENTIONS_RE.findall(s) if mentions: for mention in mentions: if '](topic:' in mention: topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() topic['id'] = topic['id'].split(':', 1)[-1] #TODO: update topic name? s = s.replace(mention, '<a href="/chat/topic/%s" class="chat">%s</a>' % (topic.get('id'), topic.get('name'))) elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] s = s.replace(mention, '<a href="/chat/user/%s" class="chat"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) else: group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() group['id'] = group['id'].split(':', 1)[-1] s = s.replace(mention, '<a href="/group/%s" class="async"><span class="tag">%s</span></a>' % (group.get('id'), group.get('name'))) # hashtags = re.compile('(#\[.*?\))').findall(s) # if hashtags: # for hashtag in hashtags: # tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict() # tag['id'] = tag['id'].split(':', 1)[-1] # s = s.replace(hashtag, # '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) cache.set(key, s, namespace="filters") return s
def flavored_markdown(text): key = '%s:flavored_markdown' % hash(text) html = cache.get(key, namespace="filters") if html: return html text = ' ' + text + ' ' text = unescape(text) # extract Reference-style links reference_urls = REFERENCE_URL_REGEX.findall(text) reference_urls = [i[0] for i in reference_urls] for i in reference_urls: text = text.replace(i, md5(i).hexdigest()) # extract urls urls = URL_REGEX.findall(text) urls = [i[0] for i in urls if i] urls.sort(key=len, reverse=True) for url in urls: for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']: if pattern % url in text: text = text.replace(pattern % url, pattern % md5(url).hexdigest()) break # extract emoticons and symbols symbols = EMOTICONS.keys() symbols.extend(SYMBOLS.keys()) symbols.sort(key=len, reverse=True) for symbol in symbols: for pattern in [' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n', '%s\r\n']: if pattern % symbol in text: text = text.replace(pattern % symbol, pattern % md5(symbol).hexdigest()) break # extract mentions mentions = re.findall('(@\[.*?\))', text) if mentions: for mention in mentions: text = text.replace(mention, md5(mention).hexdigest()) # extract hashtags hashtags = re.findall('(#\[.*?\))', text) if hashtags: for hashtag in hashtags: text = text.replace(hashtag, md5(hashtag).hexdigest()) # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle words_with_underscores = [w for w in \ re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \ if not w.startswith('_')] for word in words_with_underscores: text = text.replace(word, md5(word).hexdigest()) # treats newlines in paragraph-like content as real line breaks text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d') text = text.strip().replace('\r\n', '<br>').replace('\n', '<br>') # normalize \r\n and \n to <br> text = text.strip().replace('<br>', ' \n') # treats newlines text = text.strip().replace('|| \n', '||\n') # undo if wiki-tables text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>') # restore reference_urls for i in reference_urls: text = text.replace(md5(i).hexdigest(), i) # convert text to html html = markdown(text, extras=["wiki-tables", "cuddled-lists", "fenced-code-blocks", "header-ids", "code-friendly", "pyshell", "footnotes"]) # print html # extract code-blocks html = html.replace('\n', '<br/>') # convert multi-lines to single-lines for regex matching code_blocks = re.findall('(<code>.*?</code>)', html) for block in code_blocks: html = html.replace(block, md5(block).hexdigest()) # Show emoticons and symbols for symbol in symbols: if SYMBOLS.has_key(symbol): html = html.replace(md5(symbol).hexdigest(), SYMBOLS[symbol]) else: html = html.replace(md5(symbol).hexdigest(), EMOTICONS[symbol].replace("<img src", "<img class='emoticon' src")) # Autolinks urls, mentions, hashtags, turn youtube links to embed code for url in urls: title = api.get_url_info(url).title hash_string = md5(url).hexdigest() if len(url) > 40: html = html.replace(hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url[:40] + '...')) else: html = html.replace(hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url)) for mention in mentions: hash_string = md5(mention).hexdigest() user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] html = html.replace(hash_string, '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) for hashtag in hashtags: hash_string = md5(hashtag).hexdigest() tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict() tag['id'] = tag['id'].split(':', 1)[-1] html = html.replace(hash_string, '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) # Restore code blocks for block in code_blocks: html = html.replace(md5(block).hexdigest(), block) # restore urls, mentions, emoticons and hashtag in code blocks for url in urls: html = html.replace(md5(url).hexdigest(), url) for mention in mentions: html = html.replace(md5(mention).hexdigest(), mention) for hashtag in hashtags: html = html.replace(md5(hashtag).hexdigest(), hashtag) for symbol in symbols: html = html.replace(md5(symbol).hexdigest(), symbol) # restore words with underscores for word in words_with_underscores: html = html.replace(md5(word).hexdigest(), word) # restore \n html = html.replace('<br/>', '\n') # xss protection html = sanitize_html(html) if not html or html.isspace(): return '' # add target="_blank" to all a tags html = PyQuery(html) html('a:not(.overlay)').attr('target', '_blank') html = str(html) html = html.replace('<br/>', '<br>') cache.set(key, html, namespace="filters") return html