Esempi in Python per RenderTemplate, esempi in Python per util.RenderTemplate

Esempio n. 1

0

Mostra file

File: clean.py Progetto: Type-of-Read/readability-api

def _Clean(url):
    """Clean the contents of a given URL to only the "readable part".

  Handle special cases like YouTube, PDF, images directly.  Delegate out to
  either extract content from the site's feed, or parse and clean the HTML.

  Args:
    url: String, the URL to the interesting content.

  Returns:
    Tuple of strings: (final URL after redirects, HTML of the "readable part").
  """
    # Handle de-facto standard "hash bang" URLs ( http://goo.gl/LNmg )
    url = url.replace('#!', '?_escaped_fragment_=')
    # Otherwise ignore fragments.
    url = re.sub(r'#.*', '', url)
    # And strip common tracking noise.
    url = re.sub(r'[?&]utm_[^&]+', '', url)

    match = re.search(r'^https?://docs.google.com.*cache:.*?:(.*?\.pdf)', url,
                      re.I)
    if match:
        url = match.group(1)
        if 'http' not in url:
            url = 'http://' + url

    match = re.search(r'^https?://docs.google.com.*docid=(.*?)(&|$)', url,
                      re.I)
    if match:
        _TrackClean('direct_google_docs')
        html = util.RenderTemplate('google-docs.html', {
            'docid': match.group(1),
            'url': url
        })
        return url, html

    if re.search(r'^https?://www\.youtube\.com/watch', url, re.I):
        _TrackClean('direct_youtube')
        video_id = re.search(r'v=([^&]+)', url).group(1)
        return url, util.RenderTemplate('youtube.html', {'video_id': video_id})
    elif re.search(r'\.pdf(\?|$)', url, re.I):
        _TrackClean('direct_pdf')
        return url, util.RenderTemplate('pdf.html', {'url': url})
    elif re.search(r'\.(gif|jpe?g|png)(\?|$)', url, re.I):
        _TrackClean('direct_image')
        return url, util.RenderTemplate('image.html', {'url': url})

    try:
        response, final_url = util.Fetch(url)
    except urlfetch.DownloadError, error:
        _TrackClean('error')
        logging.error(error)
        return url, u'Download error: %s' % error

Esempio n. 2

0

Mostra file

File: main.py Progetto: cnshot/readability-api

 def get(self):
     types = ('direct_google_docs', 'direct_youtube', 'direct_trutv',
              'direct_pdf', 'direct_image', 'error', 'feed', 'content')
     stats = [(type, memcache.get('cleaned_%s' % type)) for type in types]
     self.response.headers['Content-Type'] = 'text/html'
     self.response.out.write(
         util.RenderTemplate('main.html', {'stats': stats}))

Esempio n. 3

0

Mostra file

def PrintFeed(feed_entity, include_original=False):
  if not feed_entity.entries:
    feed_entity = {
        'title': feed_entity.title,
        'link': feed_entity.title,
        'entries': [_EMPTY_ENTRY],
        }
  return util.RenderTemplate(
      'feed.xml', {'feed': feed_entity, 'include_original': include_original})

Esempio n. 4

0

Mostra file

File: clean.py Progetto: Type-of-Read/readability-api

        _TrackClean('direct_pdf')
        return url, util.RenderTemplate('pdf.html', {'url': url})
    elif re.search(r'\.(gif|jpe?g|png)(\?|$)', url, re.I):
        _TrackClean('direct_image')
        return url, util.RenderTemplate('image.html', {'url': url})

    try:
        response, final_url = util.Fetch(url)
    except urlfetch.DownloadError, error:
        _TrackClean('error')
        logging.error(error)
        return url, u'Download error: %s' % error

    if 'application/pdf' == response.headers.get('content-type', None):
        _TrackClean('direct_pdf')
        return url, util.RenderTemplate('pdf.html', {'url': url})

    note = ''
    try:
        if 'reddit.com/' in url: raise extract_feed.RssError
        extractor = extract_feed.FeedExtractor(url=url,
                                               final_url=final_url,
                                               html=response.content)
        note = 'cleaned feed'
        soup = extractor.soup
        tag = soup
        _TrackClean('feed')
    except extract_feed.RssError as e:
        note = 'cleaned content, %s, %s' % (e.__class__.__name__, e)
        soup, tag = extract_content.ExtractFromHtml(final_url,
                                                    response.content)

Esempio n. 5

0

Mostra file

File: main.py Progetto: Type-of-Read/readability-api

 def get(self):
     self.response.headers['Content-Type'] = 'text/html'
     self.response.out.write(util.RenderTemplate('main.html'))

Esempio n. 6

0

Mostra file

def _Clean(url):
    """Clean the contents of a given URL to only the "readable part".

  Handle special cases like YouTube, PDF, images directly.  Delegate out to
  either extract content from the site's feed, or parse and clean the HTML.

  Args:
    url: String, the URL to the interesting content.

  Returns:
    String: HTML representing the "readable part".
  """
    # Handle de-facto standard "hash bang" URLs ( http://goo.gl/LNmg )
    url = url.replace('#!', '?_escaped_fragment_=')
    # Otherwise ignore fragments.
    url = re.sub(r'#.*', '', url)
    # And strip common tracking noise.
    url = re.sub(r'[?&]utm_[^&]+', '', url)

    match = re.search(r'^https?://docs.google.com.*cache:.*?:(.*?\.pdf)', url,
                      re.I)
    if match:
        url = match.group(1)
        if 'http' not in url:
            url = 'http://' + url

    match = re.search(r'^https?://docs.google.com.*docid=(.*?)(&|$)', url,
                      re.I)
    if match:
        _TrackClean('direct_google_docs')
        return util.RenderTemplate('google-docs.html', {
            'docid': match.group(1),
            'url': url
        })

    if re.search(r'^http://www\.youtube\.com/watch', url, re.I):
        _TrackClean('direct_youtube')
        video_id = re.search(r'v=([^&]+)', url).group(1)
        return util.RenderTemplate('youtube.html', {'video_id': video_id})
    if re.search(r'^http://www\.trutv\.com/video', url, re.I):
        _TrackClean('direct_trutv')
        video_id = re.search(r'(/video[^?#]+).html', url).group(1)
        return util.RenderTemplate('trutv.html', {'video_id': video_id})
    elif re.search(r'\.pdf(\?|$)', url, re.I):
        _TrackClean('direct_pdf')
        return util.RenderTemplate('pdf.html', {'url': url})
    elif re.search(r'\.(gif|jpe?g|png)(\?|$)', url, re.I):
        _TrackClean('direct_image')
        return util.RenderTemplate('image.html', {'url': url})

    html, final_url, error = util.Fetch(url)
    if error:
        _TrackClean('error')
        logging.error(error)
        return u'Download error: %s' % error

    note = ''
    try:
        extractor = extract_feed.FeedExtractor(url=url,
                                               final_url=final_url,
                                               html=html)
        note = 'cleaned feed'
        soup = extractor.soup
        tag = soup
        _TrackClean('feed')
    except extract_feed.RssError, e:
        note = 'cleaned content, %s, %s' % (e.__class__.__name__, e)
        soup, tag = extract_content.ExtractFromHtml(final_url, html)
        _TrackClean('content')