def fetch_feeds(self, feeds): """ Fetch given feeds, possibly parallelizing requests """ start = time.time() load_plugins() logger.debug(u"starting fetcher") trigger_event('fetch_started') if config.fetcher.processes: from multiprocessing import Pool # Each worker has its own connection p = Pool(config.fetcher.processes, initializer=connect) p.map(feed_worker, feeds) # Exit the worker processes so their connections do not leak p.close() else: # Just sequence requests in this process for feed in feeds: feed_worker(feed) trigger_event('fetch_done', feeds) logger.info(u"%d feeds checked in %.2fs" % (len(feeds), time.time() - start))
def fetch_feeds(self, feeds): """ Fetch given feeds, possibly parallelizing requests """ start = time.time() load_plugins() logger.debug(u"starting fetcher") trigger_event('fetch_started') if config.fetcher.processes: from multiprocessing import Pool p = Pool(config.fetcher.processes) p.map(feed_worker, feeds) else: # Just sequence requests in this process for feed in feeds: feed_worker(feed) trigger_event('fetch_done', feeds) logger.info(u"%d feeds checked in %.2fs" % (len(feeds), time.time() - start))
def _parse_feed(self, data): soup = feedparser.parse(data) # Got parsing error? if hasattr(soup, 'bozo') and soup.bozo: logger.debug( u"%s caused a parser error (%s), tried to parse it anyway" % (self.netloc, soup.bozo_exception)) ft = FeedTranslator(soup.feed) self.feed.last_updated_on = ft.get_timestamp(self.instant) self.feed.alternate_link = ft.get_alternate_link() self.feed.title = self.feed.title or ft.get_title( ) # Do not set again if already set #entries = [] feed_author = ft.get_author() for entry_dict in soup.entries: t = EntryTranslator(entry_dict) link = t.get_link() guid = t.get_guid(default=link) if not guid: logger.warn(u'could not find GUID for entry from %s, skipped' % self.netloc) continue timestamp = t.get_timestamp(self.instant) content_type, content = t.get_content(('text/plain', '')) # Skip ancient entries if config.fetcher.max_history and ( self.instant - timestamp).days > config.fetcher.max_history: logger.debug( u"entry %s from %s is over maximum history, skipped" % (guid, self.netloc)) continue try: # If entry is already in database with same hashed GUID, skip it Entry.get(guid_hash=make_sha1_hash(guid)) logger.debug(u"duplicated entry %s, skipped" % guid) continue except Entry.DoesNotExist: pass entry = Entry(feed=self.feed, guid=guid, link=link, title=t.get_title(default='Untitled'), author=t.get_author() or feed_author, content=content, content_type=content_type, last_updated_on=timestamp) # At this point we are pretty sure we doesn't have the entry # already in the database so alert plugins and save data trigger_event('entry_parsed', entry, entry_dict) entry.save() #@@TODO: entries.append(entry) logger.debug(u"parsed entry %s from %s" % (guid, self.netloc))
def _parse_feed(self, data): soup = feedparser.parse(data) # Got parsing error? if hasattr(soup, 'bozo') and soup.bozo: logger.debug(u"%s caused a parser error (%s), tried to parse it anyway" % (self.netloc, soup.bozo_exception)) ft = FeedTranslator(soup.feed) self.feed.last_updated_on = ft.get_timestamp(self.instant) self.feed.alternate_link = ft.get_alternate_link() self.feed.title = self.feed.title or ft.get_title() # Do not set again if already set #entries = [] feed_author = ft.get_author() for entry_dict in soup.entries: t = EntryTranslator(entry_dict) link = t.get_link() guid = t.get_guid(default=link) if not guid: logger.warn(u'could not find GUID for entry from %s, skipped' % self.netloc) continue timestamp = t.get_timestamp(self.instant) content_type, content = t.get_content(('text/plain', '')) # Skip ancient entries if (self.instant - timestamp).days > config.fetcher.max_history: logger.debug(u"entry %s from %s is over maximum history, skipped" % (guid, self.netloc)) continue try: # If entry is already in database with same hashed GUID, skip it Entry.get(guid_hash=make_sha1_hash(guid)) logger.debug(u"duplicated entry %s, skipped" % guid) continue except Entry.DoesNotExist: pass entry = Entry( feed = self.feed, guid = guid, link = link, title = t.get_title(default='Untitled'), author = t.get_author() or feed_author, content = content, content_type = content_type, last_updated_on = timestamp ) # At this point we are pretty sure we doesn't have the entry # already in the database so alert plugins and save data trigger_event('entry_parsed', entry, entry_dict) entry.save() #@@TODO: entries.append(entry) logger.debug(u"parsed entry %s from %s" % (guid, self.netloc))
class FrontendApp(WSGIApp, FeedController, UserController): def __init__(self): super(FrontendApp, self).__init__() self.alert_message = '' self.app_namespace = { 'version_string': VERSION_STRING, 'static_url': config.web.static_url, 'alert_message': '', 'page_title': '', } # Install template filters for name in filters.__all__: filter = getattr(filters, name) self.app_namespace[filter.name] = filter def _make_view_variables(self): count, group_id, feed_id, filter_name, filter_class, panel_title, page_title = 0, 0, 0, '', '', '', '' groups = self.get_groups() r = Entry.select(Entry.id).join(Read).where( (Read.user == self.user)).naive() s = Entry.select(Entry.id).join(Saved).where( (Saved.user == self.user)).naive() read_ids = dict((i.id, None) for i in r) saved_ids = dict((i.id, None) for i in s) if 'saved' in self.request.GET: count, q = self.get_saved_entries( Entry.id).count(), self.get_saved_entries() panel_title = 'Saved' filter_class = filter_name = 'saved' page_title = 'Saved' elif 'group' in self.request.GET: group_id = int(self.request.GET['group']) group = Group.get(Group.id == group_id) count, q = self.get_group_entries( group, Entry.id).count(), self.get_group_entries(group) panel_title = group.title filter_class = 'groups' # The same when listing group filter_name = 'group=%s' % group_id page_title = group.title elif 'feed' in self.request.GET: feed_id = int(self.request.GET['feed']) feed = Feed.get(Feed.id == feed_id) count, q = self.get_feed_entries( feed, Entry.id).count(), self.get_feed_entries(feed) panel_title = feed.title filter_class = 'feeds' filter_name = 'feed=%s' % feed_id page_title = feed.title elif 'all' in self.request.GET: count, q = self.get_all_entries( Entry.id).count(), self.get_all_entries() panel_title = 'All' filter_class = filter_name = 'all' page_title = 'All' else: # Default count, q = self.get_unread_entries( Entry.id).count(), self.get_unread_entries() panel_title = 'Unread' filter_class = filter_name = 'unread' page_title = 'Unread' # Cleanup namespace del r, s, self return q, locals() # Views @GET(r'^/$') @login_required def index(self): return self.entry_list() # Entries @GET(r'^/entries/(\d+)$') @login_required def entry(self, entry_id): try: entry = Entry.get((Entry.id == entry_id)) except Entry.DoesNotExist: raise HTTPNotFound('No such entry %s' % entry_id) self.mark_entry(entry, 'read') q, namespace = self._make_view_variables() n = q.where(Entry.last_updated_on < entry.last_updated_on).order_by( Entry.last_updated_on.desc()).limit(1) namespace.update({ 'entry': entry, 'page_title': entry.title, 'next_entries': n, 'count': 0 # Fake it }) return self.respond_with_template('entry.html', namespace) @POST(r'^/entries/(\d+)$') @login_required def entry_post(self, entry_id): ''' Mark an entry as read, unread, saved and unsaved ''' try: status = self.request.POST['as'] except KeyError: raise HTTPBadRequest( 'Missing parameter as=read|unread|saved|unsaved') try: entry = Entry.get((Entry.id == entry_id)) except Entry.DoesNotExist: raise HTTPNotFound('No such entry %s' % entry_id) if 'mark' in self.request.POST: self.mark_entry(entry, status) @GET(r'^/entries/?$') @login_required def entry_list(self): ''' Show entries filtered and possibly paginated by: unread, saved, group or feed ''' q, namespace = self._make_view_variables() offset = int(self.request.GET.get('offset', 0)) entries = q.order_by(Entry.last_updated_on.desc()).offset( offset).limit(ENTRIES_PER_PAGE) namespace.update({ 'entries': q.order_by(Entry.last_updated_on.desc()).offset(offset).limit( ENTRIES_PER_PAGE), 'offset': offset + ENTRIES_PER_PAGE, 'prev_date': self.request.GET.get('prev_date', None), #'count' : count }) return self.respond_with_template('entries.html', namespace) @form(r'^/entries/mark$') @login_required def entry_list_post(self): ''' Mark feed|all entries as read ''' feed_id = int(self.request.GET.get('feed', 0)) if self.request.method == 'GET': now = datetime.utcnow() return self.respond_with_template( '_entries_mark_%s_read.html' % ('feed' if feed_id else 'all'), locals()) # Handle postback try: before = datetime.utcfromtimestamp(int( self.request.POST['before'])) except (KeyError, ValueError): raise HTTPBadRequest('Missing parameter before=time') if feed_id: try: feed = Feed.get((Feed.id == feed_id)) except Feed.DoesNotExist: raise HTTPNotFound('No such feed %s' % feed_id) q = Entry.select(Entry).join(Feed).join(Subscription).where( (Subscription.user == self.user) & # Exclude entries already marked as read ~(Entry.id << Read.select(Read.entry).where( Read.user == self.user)) & # Filter by current feed (Entry.feed == feed) & # Exclude entries fetched after the page load (Feed.last_checked_on < before)).distinct() message = 'SUCCESS Feed has been marked as read' redirect_url = '%s/entries/?feed=%s' % (self.application_url, feed_id) else: q = Entry.select(Entry).join(Feed).join(Subscription).where( (Subscription.user == self.user) & # Exclude entries already marked as read ~(Entry.id << Read.select(Read.entry).where( Read.user == self.user)) & # Exclude entries fetched after the page load (Feed.last_checked_on < before)).distinct() message = 'SUCCESS All entries have been marked as read' redirect_url = '%s/entries/?unread' % self.application_url #@@TODO: Use insert_many() with transaction(): for entry in q: try: Read.create(user=self.user, entry=entry) except IntegrityError: logger.debug(u'entry %d already marked as read, ignored' % entry.id) continue self.alert_message = message return self.respond_with_script('_modal_done.js', {'location': redirect_url}) # Groups @GET(r'^/groups/?$') @login_required def group_list(self): ''' Show feed groups for current user ''' offset, group_id, filter_class, panel_title, page_title = 0, 0, 'groups', 'Groups', 'Groups' count, q = self.get_groups().count(), self.get_groups() offset = int(self.request.GET.get('offset', 0)) groups = q.offset(offset).limit(GROUPS_PER_PAGE) offset += GROUPS_PER_PAGE return self.respond_with_template('groups.html', locals()) # Feeds @GET(r'^/feeds/?$') @login_required def feed_list(self): ''' Show subscribed feeds for current user ''' offset, group_id, feed_id, filter_class, panel_title, page_title = 0, 0, 0, 'feeds', 'Feeds', 'Feeds' max_errors = config.fetcher.max_errors groups = self.get_groups() offset = int(self.request.GET.get('offset', 0)) count, q = self.get_feeds(Feed.id).count(), self.get_feeds() feeds = q.order_by(Feed.title).offset(offset).limit(FEEDS_PER_PAGE) offset += FEEDS_PER_PAGE return self.respond_with_template('feeds.html', locals()) @form(r'^/feeds/edit/(\d+)$') @login_required def feed(self, feed_id): form_message = '' try: feed = Feed.get(Feed.id == feed_id) except Feed.DoesNotExist: raise HTTPNotFound('No such feed %s' % feed_id) # Collect editable fields title = feed.title q = Subscription.select( Subscription, Group).join(Group).where((Subscription.user == self.user) & (Subscription.feed == feed)) groups = [s.group for s in q] if self.request.method == 'GET': return self.respond_with_template('_feed_edit.html', locals()) # Handle postback form = self.request.POST title = form.get('title', '').strip() if not title: form_message = u'ERROR Error, feed title cannot be empty' return self.respond_with_template('_feed_edit.html', locals()) feed.title = title feed.save() self.alert_message = u'SUCCESS Changes have been saved.' return self.respond_with_script( '_modal_done.js', {'location': '%s/feeds/' % self.application_url}) @form(r'^/feeds/remove/(\d+)$') @login_required def feed_remove(self, feed_id): try: feed = Feed.get(Feed.id == feed_id) except Feed.DoesNotExist: raise HTTPNotFound('No such feed %s' % feed_id) if self.request.method == 'GET': return self.respond_with_modal( '%s/feeds/remove/%d' % (self.application_url, feed.id), title=u'Remove <i>%s</i> from your subscriptions?' % feed.title, button='Remove') # Handle postback Subscription.delete().where((Subscription.user == self.user) & (Subscription.feed == feed)).execute() self.alert_message = u'SUCCESS You are no longer subscribed to <i>%s</i>.' % feed.title return self.redirect_after_post('%s/feeds/' % self.application_url) @form(r'^/feeds/enable/(\d+)$') @login_required def feed_enable(self, feed_id): #@@TODO: Track in which view user triggers command try: feed = Feed.get(Feed.id == feed_id) except Feed.DoesNotExist: raise HTTPNotFound('No such feed %s' % feed_id) if self.request.method == 'GET': return self.respond_with_modal( '%s/feeds/enable/%d' % (self.application_url, feed.id), title=u'Enable <i>%s</i> again?' % feed.title, body= 'Coldsweat will attempt to update it again during the next feeds fetch.', button='Enable') # Handle postback feed.is_enabled, feed.error_count = True, 0 feed.save() self.alert_message = u'SUCCESS Feed <i>%s</i> is now enabled.' % feed.title return self.redirect_after_post('%s/feeds/' % self.application_url) @form(r'^/feeds/add/1$') @login_required def feed_add_1(self): form_message = '' groups = self.get_groups() # URL could be passed via a GET (bookmarklet) or POST self_link = self.request.params.get('self_link', '').strip() if self.request.method == 'GET': return self.respond_with_template('_feed_add_wizard_1.html', locals()) # Handle POST group_id = int(self.request.POST.get('group', 0)) # Assume HTTP if URL is passed w/out scheme self_link = self_link if self_link.startswith( 'http') else u'http://' + self_link if not validate_url(self_link): form_message = u'ERROR Error, specify a valid web address' return self.respond_with_template('_feed_add_wizard_1.html', locals()) try: response = fetch_url(self_link) except RequestException, exc: form_message = u'ERROR Error, feed address is incorrect or host is unreachable.' return self.respond_with_template('_feed_add_wizard_1.html', locals()) #else: #form_message = u'ERROR Error, a network error occured' #return self.respond_with_template('_feed_add_wizard_1.html', locals()) if not sniff_feed(response.text): links = find_feed_links(response.text, base_url=self_link) return self.respond_with_template('_feed_add_wizard_2.html', locals()) # It's a feed feed = self.add_feed_from_url(self_link, fetch_data=False) logger.debug(u"starting fetcher") trigger_event('fetch_started') Fetcher(feed).update_feed_with_data(response.text) trigger_event('fetch_done', [feed]) return self._add_subscription(feed, group_id)