Python get_url_info Examples, api.get_url_info Python Examples

Example #1

0

Show file

File: models.py Project: AloneRoad/jupo

  def description(self):
    if self.info.has_key('urls'):
      url = self.info.get('urls')[0]
      info = api.get_url_info(url)
      text = info.description
    elif self.info.has_key('message'):
      text = self.info.get('message')
    elif self.info.has_key('version'):
      text = self.info.get('version')[-1].get('content')
    if not text:
      return ''
    
    query = api.re.compile(self.query, api.re.IGNORECASE)
    description = []
    sentences = text.split('.')
    for sentence in sentences:
      if query.findall(sentence):
        s = query.sub('<b>%s</b>' % self.query, sentence.strip())
        description.append(s)
      else:
        sep_char = '... '
      if len(description) == 1:
        sep_char = '. '
      elif len(description) > 2:
        break

    description = sep_char.join(description)
    return description + '...'

Example #2

0

Show file

File: models.py Project: kiennt/jupo

    def description(self):
        if self.info.has_key('urls'):
            url = self.info.get('urls')[0]
            info = api.get_url_info(url)
            text = info.description
        elif self.info.has_key('message'):
            text = self.info.get('message')
        elif self.info.has_key('version'):
            text = self.info.get('version')[-1].get('content')
        if not text:
            return ''

        query = api.re.compile(self.query, api.re.IGNORECASE)
        description = []
        sentences = text.split('.')
        for sentence in sentences:
            if query.findall(sentence):
                s = query.sub('<b>%s</b>' % self.query, sentence.strip())
                description.append(s)
            else:
                sep_char = '... '
            if len(description) == 1:
                sep_char = '. '
            elif len(description) > 2:
                break

        description = sep_char.join(description)
        return description + '...'

Example #3

0

Show file

File: models.py Project: AloneRoad/jupo

 def urls(self):
   if self.info.has_key('urls'):
     return [api.get_url_info(url) for url in self.info.get('urls')]
   else:
     return []

Example #4

0

Show file

File: models.py Project: AloneRoad/jupo

 def urls(self):
   return [api.get_url_info(u) for u in api.extract_urls(self.message)]

Example #5

0

Show file

File: models.py Project: kiennt/jupo

 def urls(self):
     if self.info.has_key('urls'):
         return [api.get_url_info(url, db_name=self.db_name) \
                 for url in self.info.get('urls')]
     else:
         return []

Example #6

0

Show file

File: models.py Project: kiennt/jupo

 def urls(self):
     return [api.get_url_info(u, db_name=self.db_name) \
             for u in api.extract_urls(self.message)]

Example #7

0

Show file

def flavored_markdown(text):
    key = '%s:flavored_markdown' % hash(text)
    html = cache.get(key, namespace="filters")
    if html:
        return html

    text = ' ' + text + ' '
    text = unescape(text)

    # extract Reference-style links
    reference_urls = REFERENCE_URL_REGEX.findall(text)
    reference_urls = [i[0] for i in reference_urls]
    for i in reference_urls:
        text = text.replace(i, md5(i).hexdigest())

    # extract urls
    urls = URL_REGEX.findall(text)
    urls = [i[0] for i in urls if i]
    urls.sort(key=len, reverse=True)
    for url in urls:
        for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']:
            if pattern % url in text:
                text = text.replace(pattern % url,
                                    pattern % md5(url).hexdigest())
                break

    # extract emoticons and symbols
    symbols = EMOTICONS.keys()
    symbols.extend(SYMBOLS.keys())
    symbols.sort(key=len, reverse=True)
    for symbol in symbols:
        for pattern in [
                ' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n',
                '%s\r\n'
        ]:
            if pattern % symbol in text:
                text = text.replace(pattern % symbol,
                                    pattern % md5(symbol).hexdigest())
                break

    # extract mentions
    mentions = re.findall('(@\[.*?\))', text)
    if mentions:
        for mention in mentions:
            text = text.replace(mention, md5(mention).hexdigest())

    # extract hashtags
    hashtags = re.findall('(#\[.*?\))', text)
    if hashtags:
        for hashtag in hashtags:
            text = text.replace(hashtag, md5(hashtag).hexdigest())

    # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle
    words_with_underscores = [w for w in \
                              re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \
                              if not w.startswith('_')]

    for word in words_with_underscores:
        text = text.replace(word, md5(word).hexdigest())

    # treats newlines in paragraph-like content as real line breaks
    text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d')
    text = text.strip().replace('\r\n', '<br>').replace(
        '\n', '<br>')  # normalize \r\n and \n to <br>
    text = text.strip().replace('<br>', '  \n')  # treats newlines
    text = text.strip().replace('||  \n', '||\n')  # undo if wiki-tables
    text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>')

    # restore reference_urls
    for i in reference_urls:
        text = text.replace(md5(i).hexdigest(), i)

    # convert text to html
    html = markdown(text,
                    extras=[
                        "wiki-tables", "cuddled-lists", "fenced-code-blocks",
                        "header-ids", "code-friendly", "pyshell", "footnotes"
                    ])

    #  print html

    # extract code-blocks
    html = html.replace(
        '\n',
        '<br/>')  # convert multi-lines to single-lines for regex matching
    code_blocks = re.findall('(<code>.*?</code>)', html)
    for block in code_blocks:
        html = html.replace(block, md5(block).hexdigest())

    # Show emoticons and symbols
    for symbol in symbols:
        if SYMBOLS.has_key(symbol):
            html = html.replace(md5(symbol).hexdigest(), SYMBOLS[symbol])
        else:
            html = html.replace(
                md5(symbol).hexdigest(),
                EMOTICONS[symbol].replace("<img src",
                                          "<img class='emoticon' src"))

    # Autolinks urls, mentions, hashtags, turn youtube links to embed code
    for url in urls:
        title = api.get_url_info(url).title
        hash_string = md5(url).hexdigest()
        if len(url) > 40:
            html = html.replace(
                hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' %
                (url, title, url[:40] + '...'))
        else:
            html = html.replace(
                hash_string, '<a href="%s" target="_blank" title="%s">%s</a>' %
                (url, title, url))

    for mention in mentions:
        hash_string = md5(mention).hexdigest()
        user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(
            mention).groupdict()
        user['id'] = user['id'].split(':', 1)[-1]
        html = html.replace(
            hash_string,
            '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>'
            % (user.get('id'), user.get('name')))

    for hashtag in hashtags:
        hash_string = md5(hashtag).hexdigest()
        tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(
            hashtag).groupdict()
        tag['id'] = tag['id'].split(':', 1)[-1]
        html = html.replace(
            hash_string,
            '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>'
            % (tag.get('id'), tag.get('name')))

    # Restore code blocks
    for block in code_blocks:
        html = html.replace(md5(block).hexdigest(), block)

    # restore urls, mentions, emoticons and hashtag in code blocks
    for url in urls:
        html = html.replace(md5(url).hexdigest(), url)
    for mention in mentions:
        html = html.replace(md5(mention).hexdigest(), mention)
    for hashtag in hashtags:
        html = html.replace(md5(hashtag).hexdigest(), hashtag)
    for symbol in symbols:
        html = html.replace(md5(symbol).hexdigest(), symbol)

    # restore words with underscores
    for word in words_with_underscores:
        html = html.replace(md5(word).hexdigest(), word)

    # restore \n
    html = html.replace('<br/>', '\n')

    # xss protection
    html = sanitize_html(html)

    if not html or html.isspace():
        return ''

    # add target="_blank" to all a tags
    html = PyQuery(html)
    html('a:not(.overlay)').attr('target', '_blank')
    html = str(html)
    html = html.replace('<br/>', '<br>')

    cache.set(key, html, namespace="filters")
    return html

Example #8

0

Show file

File: filters.py Project: AloneRoad/jupo

def flavored_markdown(text): 
  key = '%s:flavored_markdown' % hash(text)
  html = cache.get(key, namespace="filters")
  if html:
    return html
   
  text = ' ' + text + ' '
  text = unescape(text)
  
  # extract Reference-style links
  reference_urls = REFERENCE_URL_REGEX.findall(text)
  reference_urls = [i[0] for i in reference_urls]
  for i in reference_urls:
    text = text.replace(i, md5(i).hexdigest())  
  
  # extract urls
  urls = URL_REGEX.findall(text)
  urls = [i[0] for i in urls if i]
  urls.sort(key=len, reverse=True)
  for url in urls:
    for pattern in ['%s)', ' %s', '\n%s', '\r\n%s', '%s\n', '%s\r\n']:
      if pattern % url in text:
        text = text.replace(pattern % url, pattern % md5(url).hexdigest())
        break
      
  # extract emoticons and symbols
  symbols = EMOTICONS.keys()
  symbols.extend(SYMBOLS.keys())
  symbols.sort(key=len, reverse=True)
  for symbol in symbols:
    for pattern in [' %s', ' %s. ', ' %s.\n', ' %s.\r\n', '\n%s', '\r\n%s', '%s\n', '%s\r\n']:
      if pattern % symbol in text:
        text = text.replace(pattern % symbol, pattern % md5(symbol).hexdigest())
        break
  
  # extract mentions
  mentions = re.findall('(@\[.*?\))', text)
  if mentions:
    for mention in mentions:
      text = text.replace(mention, md5(mention).hexdigest())
  
  # extract hashtags
  hashtags = re.findall('(#\[.*?\))', text)
  if hashtags:
    for hashtag in hashtags:
      text = text.replace(hashtag, md5(hashtag).hexdigest())
            
  # extract underscores words - prevent foo_bar_baz from ending up with an italic word in the middle
  words_with_underscores = [w for w in \
                            re.findall('((?! {4}|\t)\w+_\w+_\w[\w_]*)', text) \
                            if not w.startswith('_')]
  
  for word in words_with_underscores:
    text = text.replace(word, md5(word).hexdigest())
  
  # treats newlines in paragraph-like content as real line breaks
  text = text.strip().replace('<br>', '8b0f0ea73162b7552dda3c149b6c045d')
  text = text.strip().replace('\r\n', '<br>').replace('\n', '<br>') # normalize \r\n and \n to <br>
  text = text.strip().replace('<br>', '  \n') # treats newlines
  text = text.strip().replace('||  \n', '||\n') # undo if wiki-tables
  text = text.strip().replace('8b0f0ea73162b7552dda3c149b6c045d', '<br>')
  
  # restore reference_urls
  for i in reference_urls:
    text = text.replace(md5(i).hexdigest(), i) 
  
  # convert text to html
  html = markdown(text, extras=["wiki-tables",
                                "cuddled-lists",
                                "fenced-code-blocks",
                                "header-ids",
                                "code-friendly",
                                "pyshell",
                                "footnotes"])
  
#  print html
  
  # extract code-blocks
  html = html.replace('\n', '<br/>') # convert multi-lines to single-lines for regex matching
  code_blocks = re.findall('(<code>.*?</code>)', html)
  for block in code_blocks:
    html = html.replace(block, md5(block).hexdigest())
    
    
  # Show emoticons and symbols
  for symbol in symbols:
    if SYMBOLS.has_key(symbol):
      html = html.replace(md5(symbol).hexdigest(),
                          SYMBOLS[symbol])
    else:
      html = html.replace(md5(symbol).hexdigest(),
                          EMOTICONS[symbol].replace("<img src", 
                                                    "<img class='emoticon' src"))
  
  # Autolinks urls, mentions, hashtags, turn youtube links to embed code
  for url in urls: 
    title = api.get_url_info(url).title
    hash_string = md5(url).hexdigest()
    if len(url) > 40:
      html = html.replace(hash_string, 
                          '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url[:40] + '...'))
    else:
      html = html.replace(hash_string, 
                          '<a href="%s" target="_blank" title="%s">%s</a>' % (url, title, url))
  
  for mention in mentions:
    hash_string = md5(mention).hexdigest()
    user = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
    user['id'] = user['id'].split(':', 1)[-1]
    html = html.replace(hash_string, 
                        '<a href="#!/user/%s" class="overlay"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name')))
  
  for hashtag in hashtags:
    hash_string = md5(hashtag).hexdigest()
    tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict()
    tag['id'] = tag['id'].split(':', 1)[-1]
    html = html.replace(hash_string, 
                        '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name')))  
    
  # Restore code blocks
  for block in code_blocks:
    html = html.replace(md5(block).hexdigest(), block)
  
  # restore urls, mentions, emoticons and hashtag in code blocks
  for url in urls:
    html = html.replace(md5(url).hexdigest(), url)
  for mention in mentions:
    html = html.replace(md5(mention).hexdigest(), mention)
  for hashtag in hashtags:
    html = html.replace(md5(hashtag).hexdigest(), hashtag)  
  for symbol in symbols:
    html = html.replace(md5(symbol).hexdigest(), symbol)  
  
  # restore words with underscores
  for word in words_with_underscores:
    html = html.replace(md5(word).hexdigest(), word)
  
  # restore \n
  html = html.replace('<br/>', '\n') 

  # xss protection
  html = sanitize_html(html)

  if not html or html.isspace():
    return ''
  
  
  # add target="_blank" to all a tags
  html = PyQuery(html)
  html('a:not(.overlay)').attr('target', '_blank')
  html = str(html)
  html = html.replace('<br/>', '<br>')
  
  cache.set(key, html, namespace="filters")
  return html

Example #9

0

Show file

def autolink(text):
    if not text:
        return text

    key = '%s:autolink' % hash(text)
    out = cache.get(key, namespace="filters")
    if out:
        return out

    if re.match(EMAIL_RE, text):
        email = text
        user_id = api.get_user_id_from_email_address(email)
        user = api.get_user_info(user_id)
        return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name)

    s = text or ''
    s += ' '
    s = str(s)  # convert unicode to string
    s = s.replace('\r\n', '\n')

    urls = api.extract_urls(s)
    urls = list(set(urls))
    urls.sort(key=len, reverse=True)

    for url in urls:
        hash_string = md5(url).hexdigest()
        info = api.get_url_info(url)
        if not url.startswith('http'):
            s = s.replace(
                url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' %
                (hash_string, info.title if info.title else hash_string,
                 hash_string))

        elif len(url) > 60:
            u = url[:60]

            for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
                if template % url in s:
                    s = s.replace(
                        template % url, template %
                        ('<a href="%s" target="_blank" title="%s">%s</a>' %
                         (hash_string, info.title if info.title else
                          hash_string, md5(u + '...').hexdigest())))
                    break
        else:
            for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
                if template % url in s:
                    s = s.replace(
                        template % url, template %
                        ('<a href="%s" target="_blank" title="%s">%s</a>' %
                         (hash_string, info.title
                          if info.title else hash_string, hash_string)))
                    break

    for url in urls:
        s = s.replace(md5(url).hexdigest(), url)
        if len(url) > 60 and url.startswith('http'):
            s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...')

    mentions = MENTIONS_RE.findall(s)
    if mentions:
        for mention in mentions:
            if '](topic:' in mention:
                topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(
                    mention).groupdict()
                topic['id'] = topic['id'].split(':', 1)[-1]

                #TODO: update topic name?
                s = s.replace(
                    mention, '<a href="/chat/topic/%s" class="chat">%s</a>' %
                    (topic.get('id'), topic.get('name')))
            elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match(
                    mention).groupdict()
                user['id'] = user['id'].split(':', 1)[-1]
                s = s.replace(
                    mention,
                    '<a href="/user/%s" class="async"><span class="tag">%s</span></a>'
                    % (user.get('id'), user.get('name')))
            else:
                group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(
                    mention).groupdict()
                group['id'] = group['id'].split(':', 1)[-1]
                s = s.replace(
                    mention,
                    '<a href="/group/%s" class="async"><span class="tag">%s</span></a>'
                    % (group.get('id'), group.get('name')))


#  hashtags = re.compile('(#\[.*?\))').findall(s)
#  if hashtags:
#    for hashtag in hashtags:
#      tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict()
#      tag['id'] = tag['id'].split(':', 1)[-1]
#      s = s.replace(hashtag,
#           '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name')))

    cache.set(key, s, namespace="filters")
    return s

Example #10

0

Show file

File: filters.py Project: AloneRoad/jupo

def autolink(text):  
  if not text:
    return text
  
  key = '%s:autolink' % hash(text)
  out = cache.get(key, namespace="filters")
  if out:
    return out
  
  if re.match(EMAIL_RE, text):
    email = text 
    user_id = api.get_user_id_from_email_address(email)
    user = api.get_user_info(user_id)
    return '<a href="/user/%s" class="async">%s</a>' % (user.id, user.name)
    
  s = text or ''
  s += ' '
  s = str(s) # convert unicode to string
  s = s.replace('\r\n', '\n')

  
  urls = api.extract_urls(s)
  urls = list(set(urls))
  urls.sort(key=len, reverse=True)
  
  for url in urls:
    hash_string = md5(url).hexdigest()
    info = api.get_url_info(url)
    if not url.startswith('http'):
      s = s.replace(url, '<a href="http://%s/" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string))
    
    elif len(url) > 60:
      u = url[:60]
        
      for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
        if template % url in s:
          s = s.replace(template % url, 
                        template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, md5(u + '...').hexdigest())))
          break
    else:
      for template in ['%s ', ' %s', '\n%s', '%s\n', '%s.', '%s,']:
        if template % url in s:
          s = s.replace(template % url, 
                        template % ('<a href="%s" target="_blank" title="%s">%s</a>' % (hash_string, info.title if info.title else hash_string, hash_string)))
          break
        
  for url in urls:
    s = s.replace(md5(url).hexdigest(), url)
    if len(url) > 60 and url.startswith('http'):
      s = s.replace(md5(url[:60] + '...').hexdigest(), url[:60] + '...')
      
  
  mentions = MENTIONS_RE.findall(s)
  if mentions:
    for mention in mentions:
      if '](topic:' in mention:
        topic = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
        topic['id'] = topic['id'].split(':', 1)[-1]
        
        #TODO: update topic name?
        s = s.replace(mention, 
             '<a href="/chat/topic/%s" class="chat">%s</a>' % (topic.get('id'), topic.get('name')))
      elif '](user:'******'@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
        user['id'] = user['id'].split(':', 1)[-1]
        s = s.replace(mention, 
             '<a href="/chat/user/%s" class="chat"><span class="tag">%s</span></a>' % (user.get('id'), user.get('name')))
      else:
        group = re.compile('@\[(?P<name>.+)\]\((?P<id>.*)\)').match(mention).groupdict()
        group['id'] = group['id'].split(':', 1)[-1]
        s = s.replace(mention, 
             '<a href="/group/%s" class="async"><span class="tag">%s</span></a>' % (group.get('id'), group.get('name')))
        
#  hashtags = re.compile('(#\[.*?\))').findall(s)
#  if hashtags:
#    for hashtag in hashtags:
#      tag = re.compile('#\[(?P<name>.+)\]\((?P<id>.*)\)').match(hashtag).groupdict()
#      tag['id'] = tag['id'].split(':', 1)[-1]
#      s = s.replace(hashtag, 
#           '<a href="?hashtag=%s" class="overlay"><span class="tag">%s</span></a>' % (tag.get('id'), tag.get('name')))
  
  cache.set(key, s, namespace="filters")
  return s

Example #11

0

Show file

 def urls(self):
   if self.info.has_key('urls'):
     return [api.get_url_info(url) for url in self.info.get('urls')]
   else:
     return []

Example #12

0

Show file

 def urls(self):
   return [api.get_url_info(u) for u in api.extract_urls(self.message)]

Example #13

0

Show file

File: models.py Project: Coderic/jupo

 def urls(self):
   return [api.get_url_info(u, db_name=self.db_name) \
           for u in api.extract_urls(self.message)]

Example #14

0

Show file

File: models.py Project: Coderic/jupo

 def urls(self):
   if self.info.has_key('urls'):
     return [api.get_url_info(url, db_name=self.db_name) \
             for url in self.info.get('urls')]
   else:
     return []