def fetch_location_dict(area_id): key = GROCERY_LOCATION_KEY + u'{}'.format(area_id) location_dict = cache.get(key) if not location_dict: location_url = config.LOCATIONURL + str(area_id) + '/' headers = {'Authorization': config.TOKEN} response = make_api_call(location_url, headers=headers) try: data_list = json.loads(response.text) except Exception as e: logger.exception(e) return False, None, u'Unable to fetch area details' if not data_list: return False, None, u'Area Does not exist' data = data_list[0] location_dict = dict() location_dict['area'] = data.get('areaid') location_dict['country'] = [data.get('countryid')] location_dict['state'] = [data.get('stateid')] location_dict['city'] = [data.get('cityid')] location_dict['zone'] = [data.get('zoneid')] cache.set(key, location_dict, ex=GROCERY_CACHE_TTL) return True, location_dict, None
def remove_empty_lines(html): key = '%s:remove_empty_lines' % hash(html) out = cache.get(key, namespace="filters") if out: return out if '</' in html: html = html.strip().replace('\n', '') soup = BeautifulSoup(html) lines = [] for element in soup.contents: if isinstance(element, Tag): if element.text: lines.append(str(element).strip()) elif 'br' in str(element): lines.append('\n') elif isinstance(element, NavigableString): lines.append(str(element).strip()) out = ''.join(lines).strip() while '\n\n' in out: out = out.replace('\n\n', '\n') else: out = '\n'.join([line for line in html.split('\n') if line.strip()]) cache.set(key, out, namespace="filters") return out
def _render(info, post_type, owner, viewport, mode=None, **kwargs): owner_id = 'public' if (not owner or not owner.id) else owner.id if post_type in ['note', 'feed', 'file']: if mode: key = '%s:%s' % (viewport, mode) else: key = viewport if (owner and owner.id and owner.id != info.last_action.owner.id and owner.id not in info.read_receipt_ids and viewport != "discover"): status = 'unread' elif viewport == 'news_feed' and owner.id and owner.id in info.pinned_by: status = 'pinned' elif viewport == 'news_feed' and owner.id and owner.id in info.archived_by: status = 'archived' else: status = None if status: key = key + ':' + status key += ':%s:%s' % (post_type, owner_id) namespace = info.id else: key = post_type namespace = owner_id html = cache.get(key, namespace) hit = False if not html: if post_type == 'note': html = NOTE_TEMPLATE.render(note=info, owner=owner, view=viewport, mode=mode, **kwargs) elif post_type == 'file': html = FILE_TEMPLATE.render(file=info, owner=owner, view=viewport, mode=mode, **kwargs) else: html = FEED_TEMPLATE.render(feed=info, owner=owner, view=viewport, mode=mode, **kwargs) cache.set(key, html, 86400, namespace) else: hit = True html = html.replace('<li id="post', '<li data-key="%s" data-namespace="%s" data-cache-status="%s" id="post' % (key, namespace, "HIT" if hit else "MISS")) return html
def to_text(html): try: html = unicode(html) except UnicodeDecodeError: pass key = '%s:to_text' % hash(html) out = cache.get(key, namespace="filters") if not out: out = api.remove_html_tags(html) cache.set(key, out, namespace="filters") return out
def unmunge(html): """Clean up Word HTML""" if 'mso' in html: # remove outlook html style key = '%s:unmunge' % hash(html) out = cache.get(key, namespace="filters") if not out: html = re.sub(re.compile('p"mso.*?"'), 'p', html) html = re.sub(re.compile('( style=".*?")'), '', html) out = unmungeHtml(html.decode('utf-8')) cache.set(key, out, namespace="filters") return out return html
def _convert_to_text(html): try: html = unicode(html) except UnicodeDecodeError: pass key = '%s:convert_to_text' % hash(html) out = cache.get(key, namespace="filters") if not out: html = fix_unclosed_tags(html) plain_text = api.remove_html_tags(html) cache.set(key, out, namespace="filters") return out
def lines_truncate(text, lines_count=5): key = '%s:lines_truncate' % hash(text) out = cache.get(key, namespace="filters") # if out: # return out raw = text text = _normalize_newlines(text) # remove blank lines lines = [line for line in text.split('\n') if line.strip()] # text = '\n'.join(lines) images = re.compile('<img.*?>', re.IGNORECASE).findall(text) for i in images: text = text.replace(i, md5(i).hexdigest()) links = re.compile('<a.*?</a>', re.IGNORECASE).findall(text) for i in links: text = text.replace(i, md5(i).hexdigest()) text = text.replace('<br/>', '<br>') text = text.replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d') # md5('<br>').hexdigest() = '8b0f0ea73162b7552dda3c149b6c045d' text = text.strip().replace('\n', '<br>') words_per_line = 15 longest_line = max(lines[:lines_count], key=len) if len(lines) != 0 else None if longest_line and len(longest_line.split()) > words_per_line: lines = textwrap.wrap(text) else: lines = [line for line in text.split('<br>') if line.strip()] # skip blank lines (and blank lines quote) if len([line for line in lines if line.strip() and line.strip() != '>']) >= lines_count: blank_lines = len([line for line in lines if line.strip() in ['', '>']]) out = ' '.join(lines[:lines_count+blank_lines]) else: out = text if len(out) < len(text): text = ' '.join(text[:len(out)].split()[0:-1]).rstrip('.') + '...' if len(text) / float(len(raw)) > 0.7: # nếu còn 1 ít text thì hiện luôn, không cắt làm gì cho mệt text = raw out = text.replace('<br>', '\n') out = out.replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>') for i in images: out = out.replace(md5(i).hexdigest(), i) for i in links: out = out.replace(md5(i).hexdigest(), i) cache.set(key, out, namespace="filters") return out
def decorated_function(*args, **kwargs): session_id = session.get("session_id") user_id = api.get_user_id(session_id) if user_id and request.method in ["GET", "OPTIONS"]: if request.query_string: key = "%s: %s %s?%s" % (user_id, request.method, request.path, request.query_string) else: key = "%s: %s %s" % (user_id, request.method, request.path) rv = cache.get(key) if not rv: rv = f(*args, **kwargs) cache.set(key, rv) return rv elif user_id and request.method == "POST": key = "%s:*" % user_id cache.clear(key) return f(*args, **kwargs)
def start_testing(): start_testing_args = { 'test': fields.Bool(location='json', required=True), 'seconds': fields.Int(location='json', required=False, missing=3600) } try: args = parser.parse(start_testing_args, request) except werkzeug.exceptions.UnprocessableEntity as e: return handle_unprocessable_entity(e) cache.set(KAFTATESTINGKEY, args['test'], ex=args['seconds']) if not args['test']: CouponsKafkaProducer.destroy_instance() else: CouponsKafkaProducer.create_kafka_producer() rv = {'success': True} return rv
def decorated_function(*args, **kwargs): session_id = session.get('session_id') user_id = api.get_user_id(session_id) if user_id and request.method in ['GET', 'OPTIONS']: if request.query_string: key = '%s: %s %s?%s' % (user_id, request.method, request.path, request.query_string) else: key = '%s: %s %s' % (user_id, request.method, request.path) rv = cache.get(key) if not rv: rv = f(*args, **kwargs) cache.set(key, rv) return rv elif user_id and request.method == 'POST': key = '%s:*' % user_id cache.clear(key) return f(*args, **kwargs)
def description(html): try: html = unicode(html) except UnicodeDecodeError: pass key = '%s:description' % hash(html) out = cache.get(key, namespace="filters") if out: return out if '</' in html: plain_text = _convert_to_text(html) else: plain_text = html lines = [] for line in plain_text.split('\n'): if '(' in line or ')' in line: continue elif '[' in line or ']' in line: continue elif '/' in line: continue elif ';' in line: continue elif ' ' in line \ and len(line) > 15 \ and line.count('.') < 2 \ and 'dear' not in line.lower() \ and 'hi' not in line.lower() \ and 'unsubscribe' not in line.lower(): lines.append(clean(line)) else: continue lines.sort(key=len) if lines: out = lines[-1].rstrip('.') + '...' else: out = '...' cache.set(key, out, namespace="filters") return out
def sanitize_html(value): ''' https://stackoverflow.com/questions/16861/sanitising-user-input-using-python ''' if '</' not in value: # không phải HTML return value key = '%s:sanitize_html' % hash(value) out = cache.get(key, namespace="filters") if out: return out base_url = None rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:')) rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:')) re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE) # validTags = 'p i strong b u a h1 h2 h3 h4 pre br img ul ol li blockquote em code hr'.split() validTags = 'a abbr b blockquote code del ins dd dl dt em h2 h3 h4 i img kbd li ol p pre s small sup sub strong strike table tbody th tr td ul br hr div span'.split( ) validAttrs = 'src width height alt title class href'.split() urlAttrs = 'href title'.split() # Attributes which should have a URL soup = BeautifulSoup(value.decode('utf-8')) for comment in soup.findAll(text=lambda text: isinstance(text, Comment)): # Get rid of comments comment.extract() for tag in soup.findAll(True): if tag.name not in validTags: tag.hidden = True attrs = tag.attrs tag.attrs = [] for attr, val in attrs: if attr in validAttrs: val = re_scripts.sub('', val) # Remove scripts (vbs & js) if attr in urlAttrs: val = urljoin(base_url, val) # Calculate the absolute url tag.attrs.append((attr, val)) out = soup.renderContents().decode('utf8') cache.set(key, out, namespace="filters") return out
def fix_unclosed_tags(html): if not html: return html try: html = unicode(html) except UnicodeDecodeError: pass try: key = '%s:fix_unclosed_tags' % hash(html) out = cache.get(key, namespace="filters") if out: return out h = lxml.html.fromstring(html) out = lxml.html.tostring(h) cache.set(key, out, namespace="filters") return out except Exception: return ''
def sanitize_html(value): ''' https://stackoverflow.com/questions/16861/sanitising-user-input-using-python ''' if '</' not in value: # không phải HTML return value key = '%s:sanitize_html' % hash(value) out = cache.get(key, namespace="filters") if out: return out base_url=None rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:')) rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:')) re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE) # validTags = 'p i strong b u a h1 h2 h3 h4 pre br img ul ol li blockquote em code hr'.split() validTags = 'a abbr b blockquote code del ins dd dl dt em h2 h3 h4 i img kbd li ol p pre s small sup sub strong strike table tbody th tr td ul br hr div span'.split() validAttrs = 'src width height alt title class href'.split() urlAttrs = 'href title'.split() # Attributes which should have a URL soup = BeautifulSoup(value.decode('utf-8')) for comment in soup.findAll(text=lambda text: isinstance(text, Comment)): # Get rid of comments comment.extract() for tag in soup.findAll(True): if tag.name not in validTags: tag.hidden = True attrs = tag.attrs tag.attrs = [] for attr, val in attrs: if attr in validAttrs: val = re_scripts.sub('', val) # Remove scripts (vbs & js) if attr in urlAttrs: val = urljoin(base_url, val) # Calculate the absolute url tag.attrs.append((attr, val)) out = soup.renderContents().decode('utf8') cache.set(key, out, namespace="filters") return out
def flavored_markdown(text): key = '%s:flavored_markdown' % hash(text) html = cache.get(key, namespace="filters") if html: return html text = ' ' + text + ' ' text = unescape(text) # extract Reference-style links reference_urls = REFERENCE_URL_REGEX.findall(text) reference_urls = [i[0] for i in reference_urls] for i in reference_urls: text = text.replace(i, md5(i).hexdigest()) # extract urls urls = URL_REGEX.findall(text) urls = [i[0] for i in urls if i] urls.sort(key=len, reverse=True) for url in urls: for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']: if pattern % url in text: text = text.replace(pattern % url, pattern % md5(url).hexdigest()) break # extract emoticons and symbols symbols = EMOTICONS.keys() symbols.extend(SYMBOLS.keys()) symbols.sort(key=len, reverse=True) for symbol in symbols: for pattern in [ ' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n', '%s\r\n' ]: if pattern % symbol in text: text = text.replace(pattern % symbol, pattern % md5(symbol).hexdigest()) break # extract mentions mentions = re.findall('(@\[.*?\))', text) if mentions: for mention in mentions: text = text.replace(mention, md5(mention).hexdigest()) # extract hashtags hashtags = re.findall('(#\[.*?\))', text) if hashtags: for hashtag in hashtags: text = text.replace(hashtag, md5(hashtag).hexdigest()) # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle words_with_underscores = [w for w in \ re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \ if not w.startswith('_')] for word in words_with_underscores: text = text.replace(word, md5(word).hexdigest()) # treats newlines in paragraph-like content as real line breaks text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d') text = text.strip().replace('\r\n', '<br>').replace( '\n', '<br>') # normalize \r\n and \n to <br> text = text.strip().replace('<br>', ' \n') # treats newlines text = text.strip().replace('|| \n', '||\n') # undo if wiki-tables text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>') # restore reference_urls for i in reference_urls: text = text.replace(md5(i).hexdigest(), i) # convert text to html html = markdown(text, extras=[ "wiki-tables", "cuddled-lists", "fenced-code-blocks", "header-ids", "code-friendly", "pyshell", "footnotes" ]) # print html # extract code-blocks html = html.replace( '\n', '<br/>') # convert multi-lines to single-lines for regex matching code_blocks = re.findall('(<code>.*?</code>)', html) for block in code_blocks: html = html.replace(block, md5(block).hexdigest()) # Show emoticons and symbols for symbol in symbols: if SYMBOLS.has_key(symbol): html = html.replace(md5(symbol).hexdigest(), SYMBOLS[symbol]) else: html = html.replace( md5(symbol).hexdigest(), EMOTICONS[symbol].replace("<img src", "<img class='emoticon' src")) # Autolinks urls, mentions, hashtags, turn youtube links to embed code for url in urls: title = api.get_url_info(url).title hash_string = md5(url).hexdigest() if len(url) > 40: html = html.replace( hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url[:40] + '...')) else: html = html.replace( hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url)) for mention in mentions: hash_string = md5(mention).hexdigest() user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match( mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] html = html.replace( hash_string, '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) for hashtag in hashtags: hash_string = md5(hashtag).hexdigest() tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match( hashtag).groupdict() tag['id'] = tag['id'].split(':', 1)[-1] html = html.replace( hash_string, '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) # Restore code blocks for block in code_blocks: html = html.replace(md5(block).hexdigest(), block) # restore urls, mentions, emoticons and hashtag in code blocks for url in urls: html = html.replace(md5(url).hexdigest(), url) for mention in mentions: html = html.replace(md5(mention).hexdigest(), mention) for hashtag in hashtags: html = html.replace(md5(hashtag).hexdigest(), hashtag) for symbol in symbols: html = html.replace(md5(symbol).hexdigest(), symbol) # restore words with underscores for word in words_with_underscores: html = html.replace(md5(word).hexdigest(), word) # restore \n html = html.replace('<br/>', '\n') # xss protection html = sanitize_html(html) if not html or html.isspace(): return '' # add target="_blank" to all a tags html = PyQuery(html) html('a:not(.overlay)').attr('target', '_blank') html = str(html) html = html.replace('<br/>', '<br>') cache.set(key, html, namespace="filters") return html
def fetch_items(subscription_id_list, item_map): # item_map is a subscription id to a list dicts of item ids and their resp quantities # we must cache subscription id to subscription dict and fetch the rest from the api # and set the cache for them. # While iterating over subscription ids, build the list of verification item dicts # and create the item and add it to final resultant list. item_list = list() to_fetch_subscription_list = list() for subscription_id in subscription_id_list: key = GROCERY_ITEM_KEY + u'{}'.format(subscription_id) subscription_dict = cache.get(key) if subscription_dict: for item in item_map.get(subscription_id): item_id = item.get('item_id') quantity = item.get('quantity') item_dict = dict() item_dict['brand'] = subscription_dict.get('brandid') item_dict['category'] = [subscription_dict.get('categoryid')] item_dict['product'] = [subscription_dict.get('productid')] item_dict['seller'] = subscription_dict.get('sellerid') item_dict['storefront'] = subscription_dict.get( 'storefront_id') item_dict['variant'] = subscription_dict.get('variantid') item_dict['price'] = subscription_dict.get('offerprice') item_dict['quantity'] = quantity item_dict['subscription_id'] = subscription_id item_dict['item_id'] = item_id item_obj = VerificationItemData(**item_dict) item_list.append(item_obj) else: to_fetch_subscription_list.append(subscription_id) if to_fetch_subscription_list: subscription_id_list_str = ','.join( u'{}'.format(v) for v in to_fetch_subscription_list) item_url = config.SUBSCRIPTIONURL + subscription_id_list_str headers = {'Authorization': config.TOKEN} response = make_api_call(item_url, headers=headers) try: data_list = json.loads(response.text) except Exception as e: logger.exception(e) return False, None, u'Unable to fetch Items' if not isinstance( data_list, list) or len(data_list) != len(to_fetch_subscription_list): return False, None, u'Invalid Item ids provided' for data in data_list: key = GROCERY_ITEM_KEY + u'{}'.format(data.get('itemid')) cache.set(key, data, ex=GROCERY_CACHE_TTL) for item in item_map.get(u'{}'.format(data.get('itemid'))): item_id = item.get('item_id') quantity = item.get('quantity') item_dict = dict() item_dict['brand'] = data.get('brandid') item_dict['category'] = [data.get('categoryid')] item_dict['product'] = [data.get('productid')] item_dict['seller'] = data.get('sellerid') item_dict['storefront'] = data.get('storefront_id') item_dict['variant'] = data.get('variantid') item_dict['price'] = data.get('offerprice') item_dict['quantity'] = quantity item_dict['subscription_id'] = data.get('itemid') item_dict['item_id'] = item_id item_obj = VerificationItemData(**item_dict) item_list.append(item_obj) return True, item_list, None
def autolink(text): if not text: return text key = '%s:autolink' % hash(text) out = cache.get(key, namespace="filters") if out: return out if re.match(EMAIL_RE, text): email = text user_id = api.get_user_id_from_email_address(email) user = api.get_user_info(user_id) return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name) s = text or '' s += ' ' s = str(s) # convert unicode to string s = s.replace('\r\n', '\n') urls = api.extract_urls(s) urls = list(set(urls)) urls.sort(key=len, reverse=True) for url in urls: hash_string = md5(url).hexdigest() info = api.get_url_info(url) if not url.startswith('http'): s = s.replace(url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string)) elif len(url) > 60: u = url[:60] for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace(template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, md5(u + '...').hexdigest()))) break else: for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace(template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string))) break for url in urls: s = s.replace(md5(url).hexdigest(), url) if len(url) > 60 and url.startswith('http'): s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...') mentions = MENTIONS_RE.findall(s) if mentions: for mention in mentions: if '](topic:' in mention: topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() topic['id'] = topic['id'].split(':', 1)[-1] #TODO: update topic name? s = s.replace(mention, '<a href="/chat/topic/%s" class="chat">%s</a>' % (topic.get('id'), topic.get('name'))) elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] s = s.replace(mention, '<a href="/chat/user/%s" class="chat"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) else: group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() group['id'] = group['id'].split(':', 1)[-1] s = s.replace(mention, '<a href="/group/%s" class="async"><span class="tag">%s</span></a>' % (group.get('id'), group.get('name'))) # hashtags = re.compile('(#\[.*?\))').findall(s) # if hashtags: # for hashtag in hashtags: # tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict() # tag['id'] = tag['id'].split(':', 1)[-1] # s = s.replace(hashtag, # '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) cache.set(key, s, namespace="filters") return s
def fetch_items(subscription_id_list, item_map): # item_map is a subscription id to a list dicts of item ids and their resp quantities # we must cache subscription id to subscription dict and fetch the rest from the api # and set the cache for them. # While iterating over subscription ids, build the list of verification item dicts # and create the item and add it to final resultant list. item_list = list() to_fetch_subscription_list = list() for subscription_id in subscription_id_list: key = GROCERY_ITEM_KEY + u'{}'.format(subscription_id) subscription_dict = cache.get(key) if subscription_dict: for item in item_map.get(subscription_id): item_id = item.get('item_id') quantity = item.get('quantity') item_dict = copy.deepcopy(subscription_dict) item_dict['quantity'] = quantity item_dict['subscription_id'] = subscription_id item_dict['item_id'] = item_id item_obj = VerificationItemData(**item_dict) item_list.append(item_obj) else: to_fetch_subscription_list.append(subscription_id) if to_fetch_subscription_list: to_fetch_subscription_list = [ int(to_fetch_item_id) for to_fetch_item_id in to_fetch_subscription_list ] body = { "query": { "type": ["grocery"], "filters": { "id": to_fetch_subscription_list }, "select": [ "sellerId", "variantId", "productId", "categories", "storeFronts", "brandId" ] }, "count": len(to_fetch_subscription_list), "offset": 0 } headers = config.SUBSCRIPTIONHEADERS response = make_api_call(config.SUBSCRIPTIONURL, method='POST', headers=headers, body=body) try: response_data = json.loads(response.text) except Exception as e: logger.exception(e) return False, None, u'Unable to fetch Items' try: count = response_data['results'][0]['items'][0]['count'] if count != len(to_fetch_subscription_list): return False, None, u'Invalid Subscription Ids provided' raw_data_list = response_data['results'][0]['items'][0]['items'] except Exception as e: logger.exception(e) logger.error(u'Invalid Response for items {} recieved {}'.format( to_fetch_subscription_list, response_data)) return False, None, u'Unknown Error. Please contact tech support' for raw_data in raw_data_list: data = { 'variant': raw_data['variantId'], 'price': raw_data['offerPrice'], 'brand': raw_data['brandId'], 'product': [raw_data['productId']], 'seller': raw_data['sellerId'] } category_list = list() for category in raw_data['categories']: category_list.append(category['id']) data['category'] = category_list storefront_list = list() for storefront in raw_data['storeFronts']: storefront_list.append(storefront['id']) data['storefront'] = storefront_list key = GROCERY_ITEM_KEY + u'{}'.format(raw_data.get('id')) cache.set(key, data, ex=GROCERY_CACHE_TTL) for item in item_map.get(u'{}'.format(raw_data.get('id'))): item_id = item.get('item_id') quantity = item.get('quantity') item_dict = copy.deepcopy(data) item_dict['quantity'] = quantity item_dict['subscription_id'] = raw_data.get('id') item_dict['item_id'] = item_id item_obj = VerificationItemData(**item_dict) item_list.append(item_obj) return True, item_list, None
def fetch_location_dict(id): key = GROCERY_LOCATION_KEY + u'{}'.format(id) location_dict = cache.get(key) if not location_dict: location_url = config.LOCATIONURL + str(id) response = make_api_call(location_url) try: raw_data = json.loads(response.text) except Exception as e: logger.exception(e) return False, None, u'Unable to fetch details for geo id={}'.format( id) if not raw_data.get('locations'): return False, None, u'geo id={} does not exist'.format(id) locations = raw_data.get('locations') data = None for location in locations: if 'tags' in location and location['tags']: if 'grocery' in location['tags']: data = location break if not data and not ( ('tags' in locations[0]) and locations[0]['tags'] and ('grocery' not in locations[0]['tags'])): data = locations[0] if not data or not data['types']: return False, None, u'{} is not a valid geo Id'.format(id) geo_types_ordered = [ 'area', 'pincode', 'zone', 'city', 'state', 'country' ] id_types = data['types'] id_type = None for geo_type in geo_types_ordered: if geo_type in id_types: id_type = geo_type break if not id_type: return False, None, u'{} is not a valid geo Id'.format(id) location_dict = { 'area': list(), 'state': list(), 'city': list(), 'pincode': list(), 'zone': list(), 'country': list() } for container in data.get('containers'): for geo_type in geo_types_ordered: if geo_type in container['types']: location_dict[geo_type].append(container['gid']) location_dict[id_type].append(id) if not location_dict['country']: location_dict['country'].append( 1 ) # TODO remove this, once geo service starts returning country also cache.set(key, location_dict, ex=GROCERY_CACHE_TTL) return True, location_dict, None
def flavored_markdown(text): key = '%s:flavored_markdown' % hash(text) html = cache.get(key, namespace="filters") if html: return html text = ' ' + text + ' ' text = unescape(text) # extract Reference-style links reference_urls = REFERENCE_URL_REGEX.findall(text) reference_urls = [i[0] for i in reference_urls] for i in reference_urls: text = text.replace(i, md5(i).hexdigest()) # extract urls urls = URL_REGEX.findall(text) urls = [i[0] for i in urls if i] urls.sort(key=len, reverse=True) for url in urls: for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']: if pattern % url in text: text = text.replace(pattern % url, pattern % md5(url).hexdigest()) break # extract emoticons and symbols symbols = EMOTICONS.keys() symbols.extend(SYMBOLS.keys()) symbols.sort(key=len, reverse=True) for symbol in symbols: for pattern in [' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n', '%s\r\n']: if pattern % symbol in text: text = text.replace(pattern % symbol, pattern % md5(symbol).hexdigest()) break # extract mentions mentions = re.findall('(@\[.*?\))', text) if mentions: for mention in mentions: text = text.replace(mention, md5(mention).hexdigest()) # extract hashtags hashtags = re.findall('(#\[.*?\))', text) if hashtags: for hashtag in hashtags: text = text.replace(hashtag, md5(hashtag).hexdigest()) # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle words_with_underscores = [w for w in \ re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \ if not w.startswith('_')] for word in words_with_underscores: text = text.replace(word, md5(word).hexdigest()) # treats newlines in paragraph-like content as real line breaks text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d') text = text.strip().replace('\r\n', '<br>').replace('\n', '<br>') # normalize \r\n and \n to <br> text = text.strip().replace('<br>', ' \n') # treats newlines text = text.strip().replace('|| \n', '||\n') # undo if wiki-tables text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>') # restore reference_urls for i in reference_urls: text = text.replace(md5(i).hexdigest(), i) # convert text to html html = markdown(text, extras=["wiki-tables", "cuddled-lists", "fenced-code-blocks", "header-ids", "code-friendly", "pyshell", "footnotes"]) # print html # extract code-blocks html = html.replace('\n', '<br/>') # convert multi-lines to single-lines for regex matching code_blocks = re.findall('(<code>.*?</code>)', html) for block in code_blocks: html = html.replace(block, md5(block).hexdigest()) # Show emoticons and symbols for symbol in symbols: if SYMBOLS.has_key(symbol): html = html.replace(md5(symbol).hexdigest(), SYMBOLS[symbol]) else: html = html.replace(md5(symbol).hexdigest(), EMOTICONS[symbol].replace("<img src", "<img class='emoticon' src")) # Autolinks urls, mentions, hashtags, turn youtube links to embed code for url in urls: title = api.get_url_info(url).title hash_string = md5(url).hexdigest() if len(url) > 40: html = html.replace(hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url[:40] + '...')) else: html = html.replace(hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url)) for mention in mentions: hash_string = md5(mention).hexdigest() user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] html = html.replace(hash_string, '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) for hashtag in hashtags: hash_string = md5(hashtag).hexdigest() tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict() tag['id'] = tag['id'].split(':', 1)[-1] html = html.replace(hash_string, '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) # Restore code blocks for block in code_blocks: html = html.replace(md5(block).hexdigest(), block) # restore urls, mentions, emoticons and hashtag in code blocks for url in urls: html = html.replace(md5(url).hexdigest(), url) for mention in mentions: html = html.replace(md5(mention).hexdigest(), mention) for hashtag in hashtags: html = html.replace(md5(hashtag).hexdigest(), hashtag) for symbol in symbols: html = html.replace(md5(symbol).hexdigest(), symbol) # restore words with underscores for word in words_with_underscores: html = html.replace(md5(word).hexdigest(), word) # restore \n html = html.replace('<br/>', '\n') # xss protection html = sanitize_html(html) if not html or html.isspace(): return '' # add target="_blank" to all a tags html = PyQuery(html) html('a:not(.overlay)').attr('target', '_blank') html = str(html) html = html.replace('<br/>', '<br>') cache.set(key, html, namespace="filters") return html
def autolink(text): if not text: return text key = '%s:autolink' % hash(text) out = cache.get(key, namespace="filters") if out: return out if re.match(EMAIL_RE, text): email = text user_id = api.get_user_id_from_email_address(email) user = api.get_user_info(user_id) return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name) s = text or '' s += ' ' s = str(s) # convert unicode to string s = s.replace('\r\n', '\n') urls = api.extract_urls(s) urls = list(set(urls)) urls.sort(key=len, reverse=True) for url in urls: hash_string = md5(url).hexdigest() info = api.get_url_info(url) if not url.startswith('http'): s = s.replace( url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string)) elif len(url) > 60: u = url[:60] for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace( template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, md5(u + '...').hexdigest()))) break else: for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']: if template % url in s: s = s.replace( template % url, template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string))) break for url in urls: s = s.replace(md5(url).hexdigest(), url) if len(url) > 60 and url.startswith('http'): s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...') mentions = MENTIONS_RE.findall(s) if mentions: for mention in mentions: if '](topic:' in mention: topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match( mention).groupdict() topic['id'] = topic['id'].split(':', 1)[-1] #TODO: update topic name? s = s.replace( mention, '<a href="/chat/topic/%s" class="chat">%s</a>' % (topic.get('id'), topic.get('name'))) elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match( mention).groupdict() user['id'] = user['id'].split(':', 1)[-1] s = s.replace( mention, '<a href="/user/%s" class="async"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name'))) else: group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match( mention).groupdict() group['id'] = group['id'].split(':', 1)[-1] s = s.replace( mention, '<a href="/group/%s" class="async"><span class="tag">%s</span></a>' % (group.get('id'), group.get('name'))) # hashtags = re.compile('(#\[.*?\))').findall(s) # if hashtags: # for hashtag in hashtags: # tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict() # tag['id'] = tag['id'].split(':', 1)[-1] # s = s.replace(hashtag, # '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name'))) cache.set(key, s, namespace="filters") return s
def _render(info, post_type, owner, viewport, mode=None, **kwargs): if post_type == 'comment': return COMMENT_TEMPLATE.render(comment=info, owner=owner, viewport=viewport, mode=mode, **kwargs) owner_id = 'public' if (not owner or not owner.id) else owner.id if post_type in ['note', 'feed', 'file']: if mode: key = '%s:%s' % (viewport, mode) else: key = viewport if (owner and owner.id and owner.id != info.last_action.owner.id and owner.id not in info.read_receipt_ids and viewport != "discover"): status = 'unread' elif viewport == 'news_feed' and owner.id and owner.id in info.pinned_by: status = 'pinned' elif viewport == 'news_feed' and owner.id and owner.id in info.archived_by: status = 'archived' else: status = None if status: key = key + ':' + status key += ':%s:%s' % (post_type, owner_id) if kwargs.get('group'): key += ':%s' % kwargs.get('group').id namespace = info.id else: key = post_type namespace = owner_id html = cache.get(key, namespace) hit = False if not html: if post_type == 'note': html = NOTE_TEMPLATE.render(note=info, owner=owner, view=viewport, mode=mode, **kwargs) elif post_type == 'file': html = FILE_TEMPLATE.render(file=info, owner=owner, view=viewport, mode=mode, **kwargs) else: html = FEED_TEMPLATE.render(feed=info, owner=owner, view=viewport, mode=mode, **kwargs) cache.set(key, html, 86400, namespace) else: hit = True html = html.replace( '<li id="post', '<li data-key="%s" data-namespace="%s" data-cache-status="%s" id="post' % (key, namespace, "HIT" if hit else "MISS")) return html