Example #1
0
def fetch_location_dict(area_id):
    key = GROCERY_LOCATION_KEY + u'{}'.format(area_id)
    location_dict = cache.get(key)
    if not location_dict:
        location_url = config.LOCATIONURL + str(area_id) + '/'
        headers = {'Authorization': config.TOKEN}
        response = make_api_call(location_url, headers=headers)

        try:
            data_list = json.loads(response.text)
        except Exception as e:
            logger.exception(e)
            return False, None, u'Unable to fetch area details'

        if not data_list:
            return False, None, u'Area Does not exist'

        data = data_list[0]
        location_dict = dict()
        location_dict['area'] = data.get('areaid')
        location_dict['country'] = [data.get('countryid')]
        location_dict['state'] = [data.get('stateid')]
        location_dict['city'] = [data.get('cityid')]
        location_dict['zone'] = [data.get('zoneid')]

        cache.set(key, location_dict, ex=GROCERY_CACHE_TTL)

    return True, location_dict, None
Example #2
0
def remove_empty_lines(html):
    key = '%s:remove_empty_lines' % hash(html)
    out = cache.get(key, namespace="filters")
    if out:
        return out

    if '</' in html:
        html = html.strip().replace('\n', '')
        soup = BeautifulSoup(html)
        lines = []
        for element in soup.contents:
            if isinstance(element, Tag):
                if element.text:
                    lines.append(str(element).strip())
                elif 'br' in str(element):
                    lines.append('\n')
            elif isinstance(element, NavigableString):
                lines.append(str(element).strip())
        out = ''.join(lines).strip()
        while '\n\n' in out:
            out = out.replace('\n\n', '\n')
    else:
        out = '\n'.join([line for line in html.split('\n') if line.strip()])
    cache.set(key, out, namespace="filters")
    return out
Example #3
0
def remove_empty_lines(html):
  key = '%s:remove_empty_lines' % hash(html)
  out = cache.get(key, namespace="filters")
  if out:
    return out
  
  if '</' in html:
    html = html.strip().replace('\n', '')
    soup = BeautifulSoup(html)
    lines = []
    for element in soup.contents:
      if isinstance(element, Tag):
        if element.text:
          lines.append(str(element).strip())
        elif 'br' in str(element):
          lines.append('\n')
      elif isinstance(element, NavigableString):
        lines.append(str(element).strip())
    out = ''.join(lines).strip()
    while '\n\n' in out:
      out = out.replace('\n\n', '\n')
  else:
    out = '\n'.join([line for line in html.split('\n') if line.strip()])
  cache.set(key, out, namespace="filters")
  return out
Example #4
0
def _render(info, post_type, owner, viewport, mode=None, **kwargs):  
  owner_id = 'public' if (not owner or not owner.id) else owner.id
  
  if post_type in ['note', 'feed', 'file']:
    if mode:
      key = '%s:%s' % (viewport, mode)
    else:
      key = viewport
      
    if (owner and 
        owner.id and 
        owner.id != info.last_action.owner.id and 
        owner.id not in info.read_receipt_ids and 
        viewport != "discover"):
      status = 'unread'
    elif viewport == 'news_feed' and owner.id and owner.id in info.pinned_by:
      status = 'pinned'
    elif viewport == 'news_feed' and owner.id and owner.id in info.archived_by:
      status = 'archived'
    else:
      status = None
      
    if status:
      key = key + ':' + status
      
    key += ':%s:%s' % (post_type, owner_id)
    namespace = info.id
    
  else:
    key = post_type
    namespace = owner_id
    
  html = cache.get(key, namespace)
  hit = False
  if not html:
    if post_type == 'note':
      html = NOTE_TEMPLATE.render(note=info, 
                                  owner=owner, 
                                  view=viewport, 
                                  mode=mode, **kwargs)
    elif post_type == 'file':
      html = FILE_TEMPLATE.render(file=info, 
                                  owner=owner, 
                                  view=viewport, 
                                  mode=mode, **kwargs)
    
    else:
      html = FEED_TEMPLATE.render(feed=info, 
                                  owner=owner, 
                                  view=viewport, 
                                  mode=mode, **kwargs)
    cache.set(key, html, 86400, namespace)
  else:
    hit = True

  html = html.replace('<li id="post', '<li data-key="%s" data-namespace="%s" data-cache-status="%s" id="post' % (key, namespace, "HIT" if hit else "MISS"))
    
  return html
Example #5
0
def to_text(html):
    try:
        html = unicode(html)
    except UnicodeDecodeError:
        pass
    key = '%s:to_text' % hash(html)
    out = cache.get(key, namespace="filters")
    if not out:
        out = api.remove_html_tags(html)
        cache.set(key, out, namespace="filters")
    return out
Example #6
0
def to_text(html):
  try:
    html = unicode(html)
  except UnicodeDecodeError:
    pass
  key = '%s:to_text' % hash(html)
  out = cache.get(key, namespace="filters")
  if not out:
    out = api.remove_html_tags(html)
    cache.set(key, out, namespace="filters")
  return out
Example #7
0
def unmunge(html):
  """Clean up Word HTML"""
  if 'mso' in html: # remove outlook html style
    key = '%s:unmunge' % hash(html)
    out = cache.get(key, namespace="filters")
    if not out:
      html = re.sub(re.compile('p"mso.*?"'), 'p', html)
      html = re.sub(re.compile('( style=".*?")'), '', html)
      out = unmungeHtml(html.decode('utf-8'))
      cache.set(key, out, namespace="filters")
    return out
  return html
Example #8
0
def _convert_to_text(html):
  try:
    html = unicode(html)
  except UnicodeDecodeError:
    pass
  key = '%s:convert_to_text' % hash(html)
  out = cache.get(key, namespace="filters")
  if not out:
    html = fix_unclosed_tags(html)
    plain_text = api.remove_html_tags(html)
    cache.set(key, out, namespace="filters")
  return out
Example #9
0
def unmunge(html):
    """Clean up Word HTML"""
    if 'mso' in html:  # remove outlook html style
        key = '%s:unmunge' % hash(html)
        out = cache.get(key, namespace="filters")
        if not out:
            html = re.sub(re.compile('p"mso.*?"'), 'p', html)
            html = re.sub(re.compile('( style=".*?")'), '', html)
            out = unmungeHtml(html.decode('utf-8'))
            cache.set(key, out, namespace="filters")
        return out
    return html
Example #10
0
def lines_truncate(text, lines_count=5):
  
  key = '%s:lines_truncate' % hash(text)
  out = cache.get(key, namespace="filters")
#  if out:
#    return out
  
  raw = text
  text = _normalize_newlines(text)
  
  # remove blank lines
  lines = [line for line in text.split('\n') if line.strip()]
#  text = '\n'.join(lines)
  
  images = re.compile('<img.*?>', re.IGNORECASE).findall(text)
  for i in images:
    text = text.replace(i, md5(i).hexdigest())
  
  links = re.compile('<a.*?</a>', re.IGNORECASE).findall(text)
  for i in links:
    text = text.replace(i, md5(i).hexdigest())
  
  text = text.replace('<br/>', '<br>')
  text = text.replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d') # md5('<br>').hexdigest() = '8b0f0ea73162b7552dda3c149b6c045d'
  text = text.strip().replace('\n', '<br>')
  
  words_per_line = 15
  longest_line = max(lines[:lines_count], key=len) if len(lines) != 0 else None
  if longest_line and len(longest_line.split()) > words_per_line: 
    lines = textwrap.wrap(text)
  else:
    lines = [line for line in text.split('<br>') if line.strip()]
    
  # skip blank lines (and blank lines quote)
  if len([line for line in lines if line.strip() and line.strip() != '>']) >= lines_count:
    blank_lines = len([line for line in lines if line.strip() in ['', '>']])
    out = ' '.join(lines[:lines_count+blank_lines])
  else:
    out = text
    
  if len(out) < len(text):
    text = ' '.join(text[:len(out)].split()[0:-1]).rstrip('.') + '...'
    if len(text) / float(len(raw)) > 0.7: # nếu còn 1 ít text thì hiện luôn, không cắt làm gì cho mệt
      text = raw
  
  out = text.replace('<br>', '\n')
  out = out.replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>')
  for i in images:
    out = out.replace(md5(i).hexdigest(), i)
  for i in links:
    out = out.replace(md5(i).hexdigest(), i)
  cache.set(key, out, namespace="filters")
  return out  
Example #11
0
def _convert_to_text(html):
    try:
        html = unicode(html)
    except UnicodeDecodeError:
        pass
    key = '%s:convert_to_text' % hash(html)
    out = cache.get(key, namespace="filters")
    if not out:
        html = fix_unclosed_tags(html)
        plain_text = api.remove_html_tags(html)
        cache.set(key, out, namespace="filters")
    return out
Example #12
0
    def decorated_function(*args, **kwargs):
        session_id = session.get("session_id")
        user_id = api.get_user_id(session_id)
        if user_id and request.method in ["GET", "OPTIONS"]:
            if request.query_string:
                key = "%s: %s %s?%s" % (user_id, request.method, request.path, request.query_string)
            else:
                key = "%s: %s %s" % (user_id, request.method, request.path)

            rv = cache.get(key)
            if not rv:
                rv = f(*args, **kwargs)
                cache.set(key, rv)
            return rv
        elif user_id and request.method == "POST":
            key = "%s:*" % user_id
            cache.clear(key)
        return f(*args, **kwargs)
Example #13
0
def start_testing():
    start_testing_args = {
        'test': fields.Bool(location='json', required=True),
        'seconds': fields.Int(location='json', required=False, missing=3600)
    }
    try:
        args = parser.parse(start_testing_args, request)
    except werkzeug.exceptions.UnprocessableEntity as e:
        return handle_unprocessable_entity(e)

    cache.set(KAFTATESTINGKEY, args['test'], ex=args['seconds'])

    if not args['test']:
        CouponsKafkaProducer.destroy_instance()
    else:
        CouponsKafkaProducer.create_kafka_producer()

    rv = {'success': True}
    return rv
Example #14
0
    def decorated_function(*args, **kwargs):
        session_id = session.get('session_id')
        user_id = api.get_user_id(session_id)
        if user_id and request.method in ['GET', 'OPTIONS']:
            if request.query_string:
                key = '%s: %s %s?%s' % (user_id, request.method, request.path,
                                        request.query_string)
            else:
                key = '%s: %s %s' % (user_id, request.method, request.path)

            rv = cache.get(key)
            if not rv:
                rv = f(*args, **kwargs)
                cache.set(key, rv)
            return rv
        elif user_id and request.method == 'POST':
            key = '%s:*' % user_id
            cache.clear(key)
        return f(*args, **kwargs)
Example #15
0
def description(html):
  try:
    html = unicode(html)
  except UnicodeDecodeError:
    pass
  key = '%s:description' % hash(html)
  out = cache.get(key, namespace="filters")
  if out:
    return out
  
  if '</' in html:
    plain_text = _convert_to_text(html)
  else:
    plain_text = html
  lines = []
  for line in plain_text.split('\n'):
    if '(' in line or ')' in line:
      continue
    elif '[' in line or ']' in line:
      continue
    elif '/' in line:
      continue
    elif ';' in line:
      continue
    elif ' ' in line \
      and len(line) > 15 \
      and line.count('.') < 2 \
      and 'dear' not in line.lower() \
      and 'hi' not in line.lower() \
      and 'unsubscribe' not in line.lower():
      lines.append(clean(line))
    else:
      continue
  
  lines.sort(key=len)
  if lines:
    out = lines[-1].rstrip('.') + '...'
  else:
    out = '...'
  cache.set(key, out, namespace="filters")
  return out
Example #16
0
def description(html):
    try:
        html = unicode(html)
    except UnicodeDecodeError:
        pass
    key = '%s:description' % hash(html)
    out = cache.get(key, namespace="filters")
    if out:
        return out

    if '</' in html:
        plain_text = _convert_to_text(html)
    else:
        plain_text = html
    lines = []
    for line in plain_text.split('\n'):
        if '(' in line or ')' in line:
            continue
        elif '[' in line or ']' in line:
            continue
        elif '/' in line:
            continue
        elif ';' in line:
            continue
        elif ' ' in line \
          and len(line) > 15 \
          and line.count('.') < 2 \
          and 'dear' not in line.lower() \
          and 'hi' not in line.lower() \
          and 'unsubscribe' not in line.lower():
            lines.append(clean(line))
        else:
            continue

    lines.sort(key=len)
    if lines:
        out = lines[-1].rstrip('.') + '...'
    else:
        out = '...'
    cache.set(key, out, namespace="filters")
    return out
Example #17
0
def sanitize_html(value):
    '''
  https://stackoverflow.com/questions/16861/sanitising-user-input-using-python
  '''
    if '</' not in value:  # không phải HTML
        return value

    key = '%s:sanitize_html' % hash(value)
    out = cache.get(key, namespace="filters")
    if out:
        return out

    base_url = None
    rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:'))
    rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:'))
    re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE)
    #  validTags = 'p i strong b u a h1 h2 h3 h4 pre br img ul ol li blockquote em code hr'.split()
    validTags = 'a abbr b blockquote code del ins dd dl dt em h2 h3 h4 i img kbd li ol p pre s small sup sub strong strike table tbody th tr td ul br hr div span'.split(
    )
    validAttrs = 'src width height alt title class href'.split()
    urlAttrs = 'href title'.split()  # Attributes which should have a URL

    soup = BeautifulSoup(value.decode('utf-8'))
    for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
        # Get rid of comments
        comment.extract()
    for tag in soup.findAll(True):
        if tag.name not in validTags:
            tag.hidden = True
        attrs = tag.attrs
        tag.attrs = []
        for attr, val in attrs:
            if attr in validAttrs:
                val = re_scripts.sub('', val)  # Remove scripts (vbs & js)
                if attr in urlAttrs:
                    val = urljoin(base_url, val)  # Calculate the absolute url
                tag.attrs.append((attr, val))

    out = soup.renderContents().decode('utf8')
    cache.set(key, out, namespace="filters")
    return out
Example #18
0
def fix_unclosed_tags(html):
  if not html:
    return html
  
  try:
    html = unicode(html)
  except UnicodeDecodeError:
    pass
  try:
    key = '%s:fix_unclosed_tags' % hash(html)
    out = cache.get(key, namespace="filters")
    if out:
      return out
  
    h = lxml.html.fromstring(html)
    out = lxml.html.tostring(h)
    
    cache.set(key, out, namespace="filters")
    return out
  except Exception:
    return ''
Example #19
0
def fix_unclosed_tags(html):
    if not html:
        return html

    try:
        html = unicode(html)
    except UnicodeDecodeError:
        pass
    try:
        key = '%s:fix_unclosed_tags' % hash(html)
        out = cache.get(key, namespace="filters")
        if out:
            return out

        h = lxml.html.fromstring(html)
        out = lxml.html.tostring(h)

        cache.set(key, out, namespace="filters")
        return out
    except Exception:
        return ''
Example #20
0
def sanitize_html(value):
  '''
  https://stackoverflow.com/questions/16861/sanitising-user-input-using-python
  '''
  if '</' not in value: # không phải HTML
    return value
  
  key = '%s:sanitize_html' % hash(value)
  out = cache.get(key, namespace="filters")
  if out:
    return out
  
  base_url=None
  rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:'))
  rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:'))
  re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE)
#  validTags = 'p i strong b u a h1 h2 h3 h4 pre br img ul ol li blockquote em code hr'.split()
  validTags = 'a abbr b blockquote code del ins dd dl dt em h2 h3 h4 i img kbd li ol p pre s small sup sub strong strike table tbody th tr td ul br hr div span'.split()
  validAttrs = 'src width height alt title class href'.split()
  urlAttrs = 'href title'.split() # Attributes which should have a URL
  
  soup = BeautifulSoup(value.decode('utf-8'))
  for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
    # Get rid of comments
    comment.extract()
  for tag in soup.findAll(True):
    if tag.name not in validTags:
      tag.hidden = True
    attrs = tag.attrs
    tag.attrs = []
    for attr, val in attrs:
      if attr in validAttrs:
        val = re_scripts.sub('', val) # Remove scripts (vbs & js)
        if attr in urlAttrs:
          val = urljoin(base_url, val) # Calculate the absolute url
        tag.attrs.append((attr, val))

  out = soup.renderContents().decode('utf8')
  cache.set(key, out, namespace="filters")
  return out  
Example #21
0
 def decorated_function(*args, **kwargs):
   session_id = session.get('session_id')
   user_id = api.get_user_id(session_id)
   if user_id and request.method in ['GET', 'OPTIONS']:
     if request.query_string:
       key = '%s: %s %s?%s' % (user_id,
                               request.method,
                               request.path,
                               request.query_string)
     else:
       key = '%s: %s %s' % (user_id,
                               request.method,
                               request.path)
     
     rv = cache.get(key)
     if not rv:
       rv = f(*args, **kwargs)
       cache.set(key, rv)
     return rv
   elif user_id and request.method == 'POST':
     key = '%s:*' % user_id
     cache.clear(key)
   return f(*args, **kwargs)
Example #22
0
def flavored_markdown(text):
    key = '%s:flavored_markdown' % hash(text)
    html = cache.get(key, namespace="filters")
    if html:
        return html

    text = ' ' + text + ' '
    text = unescape(text)

    # extract Reference-style links
    reference_urls = REFERENCE_URL_REGEX.findall(text)
    reference_urls = [i[0] for i in reference_urls]
    for i in reference_urls:
        text = text.replace(i, md5(i).hexdigest())

    # extract urls
    urls = URL_REGEX.findall(text)
    urls = [i[0] for i in urls if i]
    urls.sort(key=len, reverse=True)
    for url in urls:
        for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']:
            if pattern % url in text:
                text = text.replace(pattern % url,
                                    pattern % md5(url).hexdigest())
                break

    # extract emoticons and symbols
    symbols = EMOTICONS.keys()
    symbols.extend(SYMBOLS.keys())
    symbols.sort(key=len, reverse=True)
    for symbol in symbols:
        for pattern in [
                ' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n',
                '%s\r\n'
        ]:
            if pattern % symbol in text:
                text = text.replace(pattern % symbol,
                                    pattern % md5(symbol).hexdigest())
                break

    # extract mentions
    mentions = re.findall('(@\[.*?\))', text)
    if mentions:
        for mention in mentions:
            text = text.replace(mention, md5(mention).hexdigest())

    # extract hashtags
    hashtags = re.findall('(#\[.*?\))', text)
    if hashtags:
        for hashtag in hashtags:
            text = text.replace(hashtag, md5(hashtag).hexdigest())

    # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle
    words_with_underscores = [w for w in \
                              re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \
                              if not w.startswith('_')]

    for word in words_with_underscores:
        text = text.replace(word, md5(word).hexdigest())

    # treats newlines in paragraph-like content as real line breaks
    text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d')
    text = text.strip().replace('\r\n', '<br>').replace(
        '\n', '<br>')  # normalize \r\n and \n to <br>
    text = text.strip().replace('<br>', '  \n')  # treats newlines
    text = text.strip().replace('||  \n', '||\n')  # undo if wiki-tables
    text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>')

    # restore reference_urls
    for i in reference_urls:
        text = text.replace(md5(i).hexdigest(), i)

    # convert text to html
    html = markdown(text,
                    extras=[
                        "wiki-tables", "cuddled-lists", "fenced-code-blocks",
                        "header-ids", "code-friendly", "pyshell", "footnotes"
                    ])

    #  print html

    # extract code-blocks
    html = html.replace(
        '\n',
        '<br/>')  # convert multi-lines to single-lines for regex matching
    code_blocks = re.findall('(<code>.*?</code>)', html)
    for block in code_blocks:
        html = html.replace(block, md5(block).hexdigest())

    # Show emoticons and symbols
    for symbol in symbols:
        if SYMBOLS.has_key(symbol):
            html = html.replace(md5(symbol).hexdigest(), SYMBOLS[symbol])
        else:
            html = html.replace(
                md5(symbol).hexdigest(),
                EMOTICONS[symbol].replace("<img src",
                                          "<img class='emoticon' src"))

    # Autolinks urls, mentions, hashtags, turn youtube links to embed code
    for url in urls:
        title = api.get_url_info(url).title
        hash_string = md5(url).hexdigest()
        if len(url) > 40:
            html = html.replace(
                hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' %
                (url, title, url[:40] + '...'))
        else:
            html = html.replace(
                hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' %
                (url, title, url))

    for mention in mentions:
        hash_string = md5(mention).hexdigest()
        user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(
            mention).groupdict()
        user['id'] = user['id'].split(':', 1)[-1]
        html = html.replace(
            hash_string,
            '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>'
            % (user.get('id'), user.get('name')))

    for hashtag in hashtags:
        hash_string = md5(hashtag).hexdigest()
        tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(
            hashtag).groupdict()
        tag['id'] = tag['id'].split(':', 1)[-1]
        html = html.replace(
            hash_string,
            '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>'
            % (tag.get('id'), tag.get('name')))

    # Restore code blocks
    for block in code_blocks:
        html = html.replace(md5(block).hexdigest(), block)

    # restore urls, mentions, emoticons and hashtag in code blocks
    for url in urls:
        html = html.replace(md5(url).hexdigest(), url)
    for mention in mentions:
        html = html.replace(md5(mention).hexdigest(), mention)
    for hashtag in hashtags:
        html = html.replace(md5(hashtag).hexdigest(), hashtag)
    for symbol in symbols:
        html = html.replace(md5(symbol).hexdigest(), symbol)

    # restore words with underscores
    for word in words_with_underscores:
        html = html.replace(md5(word).hexdigest(), word)

    # restore \n
    html = html.replace('<br/>', '\n')

    # xss protection
    html = sanitize_html(html)

    if not html or html.isspace():
        return ''

    # add target="_blank" to all a tags
    html = PyQuery(html)
    html('a:not(.overlay)').attr('target', '_blank')
    html = str(html)
    html = html.replace('<br/>', '<br>')

    cache.set(key, html, namespace="filters")
    return html
Example #23
0
def fetch_items(subscription_id_list, item_map):
    # item_map is a subscription id to a list dicts of item ids and their resp quantities
    # we must cache subscription id to subscription dict and fetch the rest from the api
    # and set the cache for them.
    # While iterating over subscription ids, build the list of verification item dicts
    # and create the item and add it to final resultant list.
    item_list = list()
    to_fetch_subscription_list = list()
    for subscription_id in subscription_id_list:
        key = GROCERY_ITEM_KEY + u'{}'.format(subscription_id)
        subscription_dict = cache.get(key)
        if subscription_dict:
            for item in item_map.get(subscription_id):
                item_id = item.get('item_id')
                quantity = item.get('quantity')
                item_dict = dict()
                item_dict['brand'] = subscription_dict.get('brandid')
                item_dict['category'] = [subscription_dict.get('categoryid')]
                item_dict['product'] = [subscription_dict.get('productid')]
                item_dict['seller'] = subscription_dict.get('sellerid')
                item_dict['storefront'] = subscription_dict.get(
                    'storefront_id')
                item_dict['variant'] = subscription_dict.get('variantid')
                item_dict['price'] = subscription_dict.get('offerprice')
                item_dict['quantity'] = quantity
                item_dict['subscription_id'] = subscription_id
                item_dict['item_id'] = item_id
                item_obj = VerificationItemData(**item_dict)
                item_list.append(item_obj)
        else:
            to_fetch_subscription_list.append(subscription_id)

    if to_fetch_subscription_list:
        subscription_id_list_str = ','.join(
            u'{}'.format(v) for v in to_fetch_subscription_list)

        item_url = config.SUBSCRIPTIONURL + subscription_id_list_str
        headers = {'Authorization': config.TOKEN}

        response = make_api_call(item_url, headers=headers)

        try:
            data_list = json.loads(response.text)
        except Exception as e:
            logger.exception(e)
            return False, None, u'Unable to fetch Items'

        if not isinstance(
                data_list,
                list) or len(data_list) != len(to_fetch_subscription_list):
            return False, None, u'Invalid Item ids provided'

        for data in data_list:
            key = GROCERY_ITEM_KEY + u'{}'.format(data.get('itemid'))
            cache.set(key, data, ex=GROCERY_CACHE_TTL)
            for item in item_map.get(u'{}'.format(data.get('itemid'))):
                item_id = item.get('item_id')
                quantity = item.get('quantity')
                item_dict = dict()
                item_dict['brand'] = data.get('brandid')
                item_dict['category'] = [data.get('categoryid')]
                item_dict['product'] = [data.get('productid')]
                item_dict['seller'] = data.get('sellerid')
                item_dict['storefront'] = data.get('storefront_id')
                item_dict['variant'] = data.get('variantid')
                item_dict['price'] = data.get('offerprice')
                item_dict['quantity'] = quantity
                item_dict['subscription_id'] = data.get('itemid')
                item_dict['item_id'] = item_id
                item_obj = VerificationItemData(**item_dict)
                item_list.append(item_obj)

    return True, item_list, None
Example #24
0
def autolink(text):  
  if not text:
    return text
  
  key = '%s:autolink' % hash(text)
  out = cache.get(key, namespace="filters")
  if out:
    return out
  
  if re.match(EMAIL_RE, text):
    email = text 
    user_id = api.get_user_id_from_email_address(email)
    user = api.get_user_info(user_id)
    return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name)
    
  s = text or ''
  s += ' '
  s = str(s) # convert unicode to string
  s = s.replace('\r\n', '\n')

  
  urls = api.extract_urls(s)
  urls = list(set(urls))
  urls.sort(key=len, reverse=True)
  
  for url in urls:
    hash_string = md5(url).hexdigest()
    info = api.get_url_info(url)
    if not url.startswith('http'):
      s = s.replace(url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string))
    
    elif len(url) > 60:
      u = url[:60]
        
      for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
        if template % url in s:
          s = s.replace(template % url, 
                        template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, md5(u + '...').hexdigest())))
          break
    else:
      for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
        if template % url in s:
          s = s.replace(template % url, 
                        template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string)))
          break
        
  for url in urls:
    s = s.replace(md5(url).hexdigest(), url)
    if len(url) > 60 and url.startswith('http'):
      s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...')
      
  
  mentions = MENTIONS_RE.findall(s)
  if mentions:
    for mention in mentions:
      if '](topic:' in mention:
        topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
        topic['id'] = topic['id'].split(':', 1)[-1]
        
        #TODO: update topic name?
        s = s.replace(mention, 
             '<a href="/chat/topic/%s" class="chat">%s</a>' % (topic.get('id'), topic.get('name')))
      elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
        user['id'] = user['id'].split(':', 1)[-1]
        s = s.replace(mention, 
             '<a href="/chat/user/%s" class="chat"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name')))
      else:
        group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
        group['id'] = group['id'].split(':', 1)[-1]
        s = s.replace(mention, 
             '<a href="/group/%s" class="async"><span class="tag">%s</span></a>' % (group.get('id'), group.get('name')))
        
#  hashtags = re.compile('(#\[.*?\))').findall(s)
#  if hashtags:
#    for hashtag in hashtags:
#      tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict()
#      tag['id'] = tag['id'].split(':', 1)[-1]
#      s = s.replace(hashtag, 
#           '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name')))
  
  cache.set(key, s, namespace="filters")
  return s
Example #25
0
def fetch_items(subscription_id_list, item_map):
    # item_map is a subscription id to a list dicts of item ids and their resp quantities
    # we must cache subscription id to subscription dict and fetch the rest from the api
    # and set the cache for them.
    # While iterating over subscription ids, build the list of verification item dicts
    # and create the item and add it to final resultant list.
    item_list = list()
    to_fetch_subscription_list = list()
    for subscription_id in subscription_id_list:
        key = GROCERY_ITEM_KEY + u'{}'.format(subscription_id)
        subscription_dict = cache.get(key)
        if subscription_dict:
            for item in item_map.get(subscription_id):
                item_id = item.get('item_id')
                quantity = item.get('quantity')
                item_dict = copy.deepcopy(subscription_dict)
                item_dict['quantity'] = quantity
                item_dict['subscription_id'] = subscription_id
                item_dict['item_id'] = item_id
                item_obj = VerificationItemData(**item_dict)
                item_list.append(item_obj)
        else:
            to_fetch_subscription_list.append(subscription_id)

    if to_fetch_subscription_list:
        to_fetch_subscription_list = [
            int(to_fetch_item_id)
            for to_fetch_item_id in to_fetch_subscription_list
        ]

        body = {
            "query": {
                "type": ["grocery"],
                "filters": {
                    "id": to_fetch_subscription_list
                },
                "select": [
                    "sellerId", "variantId", "productId", "categories",
                    "storeFronts", "brandId"
                ]
            },
            "count": len(to_fetch_subscription_list),
            "offset": 0
        }

        headers = config.SUBSCRIPTIONHEADERS

        response = make_api_call(config.SUBSCRIPTIONURL,
                                 method='POST',
                                 headers=headers,
                                 body=body)

        try:
            response_data = json.loads(response.text)
        except Exception as e:
            logger.exception(e)
            return False, None, u'Unable to fetch Items'

        try:
            count = response_data['results'][0]['items'][0]['count']
            if count != len(to_fetch_subscription_list):
                return False, None, u'Invalid Subscription Ids provided'
            raw_data_list = response_data['results'][0]['items'][0]['items']
        except Exception as e:
            logger.exception(e)
            logger.error(u'Invalid Response for items {} recieved {}'.format(
                to_fetch_subscription_list, response_data))
            return False, None, u'Unknown Error. Please contact tech support'

        for raw_data in raw_data_list:
            data = {
                'variant': raw_data['variantId'],
                'price': raw_data['offerPrice'],
                'brand': raw_data['brandId'],
                'product': [raw_data['productId']],
                'seller': raw_data['sellerId']
            }
            category_list = list()
            for category in raw_data['categories']:
                category_list.append(category['id'])
            data['category'] = category_list
            storefront_list = list()
            for storefront in raw_data['storeFronts']:
                storefront_list.append(storefront['id'])
            data['storefront'] = storefront_list
            key = GROCERY_ITEM_KEY + u'{}'.format(raw_data.get('id'))
            cache.set(key, data, ex=GROCERY_CACHE_TTL)
            for item in item_map.get(u'{}'.format(raw_data.get('id'))):
                item_id = item.get('item_id')
                quantity = item.get('quantity')
                item_dict = copy.deepcopy(data)
                item_dict['quantity'] = quantity
                item_dict['subscription_id'] = raw_data.get('id')
                item_dict['item_id'] = item_id
                item_obj = VerificationItemData(**item_dict)
                item_list.append(item_obj)

    return True, item_list, None
Example #26
0
def fetch_location_dict(id):
    key = GROCERY_LOCATION_KEY + u'{}'.format(id)
    location_dict = cache.get(key)
    if not location_dict:
        location_url = config.LOCATIONURL + str(id)
        response = make_api_call(location_url)

        try:
            raw_data = json.loads(response.text)
        except Exception as e:
            logger.exception(e)
            return False, None, u'Unable to fetch details for geo id={}'.format(
                id)

        if not raw_data.get('locations'):
            return False, None, u'geo id={} does not exist'.format(id)

        locations = raw_data.get('locations')

        data = None
        for location in locations:
            if 'tags' in location and location['tags']:
                if 'grocery' in location['tags']:
                    data = location
                    break

        if not data and not (
            ('tags' in locations[0]) and locations[0]['tags'] and
            ('grocery' not in locations[0]['tags'])):
            data = locations[0]

        if not data or not data['types']:
            return False, None, u'{} is not a valid geo Id'.format(id)

        geo_types_ordered = [
            'area', 'pincode', 'zone', 'city', 'state', 'country'
        ]
        id_types = data['types']
        id_type = None
        for geo_type in geo_types_ordered:
            if geo_type in id_types:
                id_type = geo_type
                break

        if not id_type:
            return False, None, u'{} is not a valid geo Id'.format(id)

        location_dict = {
            'area': list(),
            'state': list(),
            'city': list(),
            'pincode': list(),
            'zone': list(),
            'country': list()
        }
        for container in data.get('containers'):
            for geo_type in geo_types_ordered:
                if geo_type in container['types']:
                    location_dict[geo_type].append(container['gid'])
        location_dict[id_type].append(id)

        if not location_dict['country']:
            location_dict['country'].append(
                1
            )  # TODO remove this, once geo service starts returning country also

        cache.set(key, location_dict, ex=GROCERY_CACHE_TTL)

    return True, location_dict, None
Example #27
0
def flavored_markdown(text): 
  key = '%s:flavored_markdown' % hash(text)
  html = cache.get(key, namespace="filters")
  if html:
    return html
   
  text = ' ' + text + ' '
  text = unescape(text)
  
  # extract Reference-style links
  reference_urls = REFERENCE_URL_REGEX.findall(text)
  reference_urls = [i[0] for i in reference_urls]
  for i in reference_urls:
    text = text.replace(i, md5(i).hexdigest())  
  
  # extract urls
  urls = URL_REGEX.findall(text)
  urls = [i[0] for i in urls if i]
  urls.sort(key=len, reverse=True)
  for url in urls:
    for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']:
      if pattern % url in text:
        text = text.replace(pattern % url, pattern % md5(url).hexdigest())
        break
      
  # extract emoticons and symbols
  symbols = EMOTICONS.keys()
  symbols.extend(SYMBOLS.keys())
  symbols.sort(key=len, reverse=True)
  for symbol in symbols:
    for pattern in [' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n', '%s\r\n']:
      if pattern % symbol in text:
        text = text.replace(pattern % symbol, pattern % md5(symbol).hexdigest())
        break
  
  # extract mentions
  mentions = re.findall('(@\[.*?\))', text)
  if mentions:
    for mention in mentions:
      text = text.replace(mention, md5(mention).hexdigest())
  
  # extract hashtags
  hashtags = re.findall('(#\[.*?\))', text)
  if hashtags:
    for hashtag in hashtags:
      text = text.replace(hashtag, md5(hashtag).hexdigest())
            
  # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle
  words_with_underscores = [w for w in \
                            re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \
                            if not w.startswith('_')]
  
  for word in words_with_underscores:
    text = text.replace(word, md5(word).hexdigest())
  
  # treats newlines in paragraph-like content as real line breaks
  text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d')
  text = text.strip().replace('\r\n', '<br>').replace('\n', '<br>') # normalize \r\n and \n to <br>
  text = text.strip().replace('<br>', '  \n') # treats newlines
  text = text.strip().replace('||  \n', '||\n') # undo if wiki-tables
  text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>')
  
  # restore reference_urls
  for i in reference_urls:
    text = text.replace(md5(i).hexdigest(), i) 
  
  # convert text to html
  html = markdown(text, extras=["wiki-tables",
                                "cuddled-lists",
                                "fenced-code-blocks",
                                "header-ids",
                                "code-friendly",
                                "pyshell",
                                "footnotes"])
  
#  print html
  
  # extract code-blocks
  html = html.replace('\n', '<br/>') # convert multi-lines to single-lines for regex matching
  code_blocks = re.findall('(<code>.*?</code>)', html)
  for block in code_blocks:
    html = html.replace(block, md5(block).hexdigest())
    
    
  # Show emoticons and symbols
  for symbol in symbols:
    if SYMBOLS.has_key(symbol):
      html = html.replace(md5(symbol).hexdigest(),
                          SYMBOLS[symbol])
    else:
      html = html.replace(md5(symbol).hexdigest(),
                          EMOTICONS[symbol].replace("<img src", 
                                                    "<img class='emoticon' src"))
  
  # Autolinks urls, mentions, hashtags, turn youtube links to embed code
  for url in urls: 
    title = api.get_url_info(url).title
    hash_string = md5(url).hexdigest()
    if len(url) > 40:
      html = html.replace(hash_string, 
                          '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url[:40] + '...'))
    else:
      html = html.replace(hash_string, 
                          '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url))
  
  for mention in mentions:
    hash_string = md5(mention).hexdigest()
    user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
    user['id'] = user['id'].split(':', 1)[-1]
    html = html.replace(hash_string, 
                        '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name')))
  
  for hashtag in hashtags:
    hash_string = md5(hashtag).hexdigest()
    tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict()
    tag['id'] = tag['id'].split(':', 1)[-1]
    html = html.replace(hash_string, 
                        '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name')))  
    
  # Restore code blocks
  for block in code_blocks:
    html = html.replace(md5(block).hexdigest(), block)
  
  # restore urls, mentions, emoticons and hashtag in code blocks
  for url in urls:
    html = html.replace(md5(url).hexdigest(), url)
  for mention in mentions:
    html = html.replace(md5(mention).hexdigest(), mention)
  for hashtag in hashtags:
    html = html.replace(md5(hashtag).hexdigest(), hashtag)  
  for symbol in symbols:
    html = html.replace(md5(symbol).hexdigest(), symbol)  
  
  # restore words with underscores
  for word in words_with_underscores:
    html = html.replace(md5(word).hexdigest(), word)
  
  # restore \n
  html = html.replace('<br/>', '\n') 

  # xss protection
  html = sanitize_html(html)

  if not html or html.isspace():
    return ''
  
  
  # add target="_blank" to all a tags
  html = PyQuery(html)
  html('a:not(.overlay)').attr('target', '_blank')
  html = str(html)
  html = html.replace('<br/>', '<br>')
  
  cache.set(key, html, namespace="filters")
  return html  
Example #28
0
def autolink(text):
    if not text:
        return text

    key = '%s:autolink' % hash(text)
    out = cache.get(key, namespace="filters")
    if out:
        return out

    if re.match(EMAIL_RE, text):
        email = text
        user_id = api.get_user_id_from_email_address(email)
        user = api.get_user_info(user_id)
        return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name)

    s = text or ''
    s += ' '
    s = str(s)  # convert unicode to string
    s = s.replace('\r\n', '\n')

    urls = api.extract_urls(s)
    urls = list(set(urls))
    urls.sort(key=len, reverse=True)

    for url in urls:
        hash_string = md5(url).hexdigest()
        info = api.get_url_info(url)
        if not url.startswith('http'):
            s = s.replace(
                url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' %
                (hash_string, info.title if info.title else hash_string,
                 hash_string))

        elif len(url) > 60:
            u = url[:60]

            for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
                if template % url in s:
                    s = s.replace(
                        template % url, template %
                        ('<a href="%s" target="_blank" title="%s">%s</a>' %
                         (hash_string, info.title if info.title else
                          hash_string, md5(u + '...').hexdigest())))
                    break
        else:
            for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
                if template % url in s:
                    s = s.replace(
                        template % url, template %
                        ('<a href="%s" target="_blank" title="%s">%s</a>' %
                         (hash_string, info.title
                          if info.title else hash_string, hash_string)))
                    break

    for url in urls:
        s = s.replace(md5(url).hexdigest(), url)
        if len(url) > 60 and url.startswith('http'):
            s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...')

    mentions = MENTIONS_RE.findall(s)
    if mentions:
        for mention in mentions:
            if '](topic:' in mention:
                topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(
                    mention).groupdict()
                topic['id'] = topic['id'].split(':', 1)[-1]

                #TODO: update topic name?
                s = s.replace(
                    mention, '<a href="/chat/topic/%s" class="chat">%s</a>' %
                    (topic.get('id'), topic.get('name')))
            elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match(
                    mention).groupdict()
                user['id'] = user['id'].split(':', 1)[-1]
                s = s.replace(
                    mention,
                    '<a href="/user/%s" class="async"><span class="tag">%s</span></a>'
                    % (user.get('id'), user.get('name')))
            else:
                group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(
                    mention).groupdict()
                group['id'] = group['id'].split(':', 1)[-1]
                s = s.replace(
                    mention,
                    '<a href="/group/%s" class="async"><span class="tag">%s</span></a>'
                    % (group.get('id'), group.get('name')))


#  hashtags = re.compile('(#\[.*?\))').findall(s)
#  if hashtags:
#    for hashtag in hashtags:
#      tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict()
#      tag['id'] = tag['id'].split(':', 1)[-1]
#      s = s.replace(hashtag,
#           '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name')))

    cache.set(key, s, namespace="filters")
    return s
Example #29
0
def _render(info, post_type, owner, viewport, mode=None, **kwargs):

    if post_type == 'comment':
        return COMMENT_TEMPLATE.render(comment=info,
                                       owner=owner,
                                       viewport=viewport,
                                       mode=mode,
                                       **kwargs)

    owner_id = 'public' if (not owner or not owner.id) else owner.id

    if post_type in ['note', 'feed', 'file']:
        if mode:
            key = '%s:%s' % (viewport, mode)
        else:
            key = viewport

        if (owner and owner.id and owner.id != info.last_action.owner.id
                and owner.id not in info.read_receipt_ids
                and viewport != "discover"):
            status = 'unread'
        elif viewport == 'news_feed' and owner.id and owner.id in info.pinned_by:
            status = 'pinned'
        elif viewport == 'news_feed' and owner.id and owner.id in info.archived_by:
            status = 'archived'
        else:
            status = None

        if status:
            key = key + ':' + status

        key += ':%s:%s' % (post_type, owner_id)
        if kwargs.get('group'):
            key += ':%s' % kwargs.get('group').id

        namespace = info.id

    else:
        key = post_type
        namespace = owner_id

    html = cache.get(key, namespace)
    hit = False
    if not html:
        if post_type == 'note':
            html = NOTE_TEMPLATE.render(note=info,
                                        owner=owner,
                                        view=viewport,
                                        mode=mode,
                                        **kwargs)
        elif post_type == 'file':
            html = FILE_TEMPLATE.render(file=info,
                                        owner=owner,
                                        view=viewport,
                                        mode=mode,
                                        **kwargs)
        else:
            html = FEED_TEMPLATE.render(feed=info,
                                        owner=owner,
                                        view=viewport,
                                        mode=mode,
                                        **kwargs)

        cache.set(key, html, 86400, namespace)
    else:
        hit = True

    html = html.replace(
        '<li id="post',
        '<li data-key="%s" data-namespace="%s" data-cache-status="%s" id="post'
        % (key, namespace, "HIT" if hit else "MISS"))

    return html