コード例 #1
0
ファイル: tasks.py プロジェクト: saga/kindledump
def fetch_pages(request):
    user_email = request.POST.get('user_email', None)

    user = users.User(email=user_email)
    if user is None:
        logging.error('User not found: %s', user_email)
        raise TypeError('User not found')

    page_urls_p = request.POST.get('page_urls', None)
    if not page_urls_p:
        logging.error('Page urls not given')
        raise TypeError('Page urls not given')

    page_urls = pickle.loads(str(page_urls_p))

    logging.debug('fetching pages: %s;;%s', user_email, page_urls)
    pages = []
    for url in page_urls:
        html = urllib.urlopen(url).read(10240)
        doc = redability.Document(html)
        pages.append(doc.summary().encode('ascii','ignore'))

    rd = ReadyData(owner=user, data_type='page')
    rd.content = '<hr>'.join(p for p in pages)
    rd.merged = len(pages)
    rd.put()
    logging.debug('ReadyData for fetched pages created: %s', page_urls)

    # schedule task for fetched data send
    params = {'ready_data_key': rd.key()}
    taskqueue.add(url=reverse('fetcher-send'), params=params)
    logging.debug('task created')

    return True
コード例 #2
0
ファイル: tasks.py プロジェクト: saga/kindledump
def fetch_feeds(request):
    feed_key = request.POST.get('feed_key', None)

    if feed_key is None:
        logging.error('missing parameter')
        raise TypeError('missing parameter')

    feed = Feed.get(feed_key)
    if feed is None:
        logging.error('Feed object not found: %s', feed_key)
        raise TypeError('Feed object not found')

    parser = feedparser.parse(feed.url)

    # check if feed exists
    if hasattr(feed, 'bozo_exception'):
        feed.is_valid = False
        logging.warn('Invalid feed: %s;;%s', feed.id, feed.url)
        feed.put()
        return

    # setup feed title if does not exist
    if not feed.title:
        feed.title = parser.feed.title

    rd = ReadyData.gql("WHERE data_type = :1 AND owner = :2 LIMIT 1",
                'feed', feed.owner).get()
    if rd is None:
        rd = ReadyData(owner=feed.owner, data_type='feed')
        rd.content = ''

    for e in parser['entries']:
        # TODO - check the date

        article = '<h1>%(title)s</h1>' % e
        for content in e['content']:
            article += content['value']

        rd.content += article
        rd.merged += 1

    rd.put()
    feed.put()

    params = {'ready_data_key': rd.key()}
    taskqueue.add(url=reverse('fetcher-send'), params=params)

    return True