Example #1
0
 def fetch_feeds(self, feeds):
     """
     Fetch given feeds, possibly parallelizing requests
     """
     
     start = time.time()
     
     load_plugins()
 
     logger.debug(u"starting fetcher")
     trigger_event('fetch_started')
         
     if config.fetcher.processes:
         from multiprocessing import Pool
         # Each worker has its own connection
         p = Pool(config.fetcher.processes, initializer=connect)
         p.map(feed_worker, feeds)
         # Exit the worker processes so their connections do not leak
         p.close()
     else:
         # Just sequence requests in this process
         for feed in feeds:
             feed_worker(feed)
     
     trigger_event('fetch_done', feeds)
     
     logger.info(u"%d feeds checked in %.2fs" % (len(feeds), time.time() - start))        
    def fetch_feeds(self, feeds):
        """
        Fetch given feeds, possibly parallelizing requests
        """

        start = time.time()

        load_plugins()

        logger.debug(u"starting fetcher")
        trigger_event('fetch_started')

        if config.fetcher.processes:
            from multiprocessing import Pool
            p = Pool(config.fetcher.processes)
            p.map(feed_worker, feeds)
        else:
            # Just sequence requests in this process
            for feed in feeds:
                feed_worker(feed)

        trigger_event('fetch_done', feeds)

        logger.info(u"%d feeds checked in %.2fs" %
                    (len(feeds), time.time() - start))
Example #3
0
 def fetch_feeds(self, feeds):
     """
     Fetch given feeds, possibly parallelizing requests
     """
     
     start = time.time()
     
     load_plugins()
 
     logger.debug(u"starting fetcher")
     trigger_event('fetch_started')
         
     if config.fetcher.processes:
         from multiprocessing import Pool
         p = Pool(config.fetcher.processes)
         p.map(feed_worker, feeds)
     else:
         # Just sequence requests in this process
         for feed in feeds:
             feed_worker(feed)
     
     trigger_event('fetch_done', feeds)
     
     logger.info(u"%d feeds checked in %.2fs" % (len(feeds), time.time() - start))        
Example #4
0
    def _parse_feed(self, data):

        soup = feedparser.parse(data)
        # Got parsing error?
        if hasattr(soup, 'bozo') and soup.bozo:
            logger.debug(
                u"%s caused a parser error (%s), tried to parse it anyway" %
                (self.netloc, soup.bozo_exception))

        ft = FeedTranslator(soup.feed)

        self.feed.last_updated_on = ft.get_timestamp(self.instant)
        self.feed.alternate_link = ft.get_alternate_link()
        self.feed.title = self.feed.title or ft.get_title(
        )  # Do not set again if already set

        #entries = []
        feed_author = ft.get_author()

        for entry_dict in soup.entries:

            t = EntryTranslator(entry_dict)

            link = t.get_link()
            guid = t.get_guid(default=link)

            if not guid:
                logger.warn(u'could not find GUID for entry from %s, skipped' %
                            self.netloc)
                continue

            timestamp = t.get_timestamp(self.instant)
            content_type, content = t.get_content(('text/plain', ''))

            # Skip ancient entries
            if config.fetcher.max_history and (
                    self.instant -
                    timestamp).days > config.fetcher.max_history:
                logger.debug(
                    u"entry %s from %s is over maximum history, skipped" %
                    (guid, self.netloc))
                continue

            try:
                # If entry is already in database with same hashed GUID, skip it
                Entry.get(guid_hash=make_sha1_hash(guid))
                logger.debug(u"duplicated entry %s, skipped" % guid)
                continue
            except Entry.DoesNotExist:
                pass

            entry = Entry(feed=self.feed,
                          guid=guid,
                          link=link,
                          title=t.get_title(default='Untitled'),
                          author=t.get_author() or feed_author,
                          content=content,
                          content_type=content_type,
                          last_updated_on=timestamp)

            # At this point we are pretty sure we doesn't have the entry
            #  already in the database so alert plugins and save data
            trigger_event('entry_parsed', entry, entry_dict)
            entry.save()
            #@@TODO: entries.append(entry)

            logger.debug(u"parsed entry %s from %s" % (guid, self.netloc))
Example #5
0
    def _parse_feed(self, data):

        soup = feedparser.parse(data)         
        # Got parsing error?
        if hasattr(soup, 'bozo') and soup.bozo:
            logger.debug(u"%s caused a parser error (%s), tried to parse it anyway" % (self.netloc, soup.bozo_exception))

        ft = FeedTranslator(soup.feed)
        
        self.feed.last_updated_on    = ft.get_timestamp(self.instant)        
        self.feed.alternate_link     = ft.get_alternate_link()        
        self.feed.title              = self.feed.title or ft.get_title() # Do not set again if already set

        #entries = []
        feed_author = ft.get_author()

        for entry_dict in soup.entries:
    
            t = EntryTranslator(entry_dict)
            
            link = t.get_link()
            guid = t.get_guid(default=link)
    
            if not guid:
                logger.warn(u'could not find GUID for entry from %s, skipped' % self.netloc)
                continue

            timestamp               = t.get_timestamp(self.instant)
            content_type, content   = t.get_content(('text/plain', ''))
        
            # Skip ancient entries        
            if (self.instant - timestamp).days > config.fetcher.max_history:
                logger.debug(u"entry %s from %s is over maximum history, skipped" % (guid, self.netloc))
                continue
    
            try:
                # If entry is already in database with same hashed GUID, skip it
                Entry.get(guid_hash=make_sha1_hash(guid)) 
                logger.debug(u"duplicated entry %s, skipped" % guid)
                continue
            except Entry.DoesNotExist:
                pass
    
            entry = Entry(
                feed              = self.feed,                
                guid              = guid,
                link              = link,
                title             = t.get_title(default='Untitled'),
                author            = t.get_author() or feed_author,
                content           = content,
                content_type      = content_type,
                last_updated_on   = timestamp
            )
            
            # At this point we are pretty sure we doesn't have the entry 
            #  already in the database so alert plugins and save data
            trigger_event('entry_parsed', entry, entry_dict)
            entry.save()
            #@@TODO: entries.append(entry)
    
            logger.debug(u"parsed entry %s from %s" % (guid, self.netloc))  
Example #6
0
class FrontendApp(WSGIApp, FeedController, UserController):
    def __init__(self):
        super(FrontendApp, self).__init__()

        self.alert_message = ''
        self.app_namespace = {
            'version_string': VERSION_STRING,
            'static_url': config.web.static_url,
            'alert_message': '',
            'page_title': '',
        }
        # Install template filters
        for name in filters.__all__:
            filter = getattr(filters, name)
            self.app_namespace[filter.name] = filter

    def _make_view_variables(self):

        count, group_id, feed_id, filter_name, filter_class, panel_title, page_title = 0, 0, 0, '', '', '', ''

        groups = self.get_groups()
        r = Entry.select(Entry.id).join(Read).where(
            (Read.user == self.user)).naive()
        s = Entry.select(Entry.id).join(Saved).where(
            (Saved.user == self.user)).naive()
        read_ids = dict((i.id, None) for i in r)
        saved_ids = dict((i.id, None) for i in s)

        if 'saved' in self.request.GET:
            count, q = self.get_saved_entries(
                Entry.id).count(), self.get_saved_entries()
            panel_title = 'Saved'
            filter_class = filter_name = 'saved'
            page_title = 'Saved'
        elif 'group' in self.request.GET:
            group_id = int(self.request.GET['group'])
            group = Group.get(Group.id == group_id)
            count, q = self.get_group_entries(
                group, Entry.id).count(), self.get_group_entries(group)
            panel_title = group.title
            filter_class = 'groups'  # The same when listing group
            filter_name = 'group=%s' % group_id
            page_title = group.title
        elif 'feed' in self.request.GET:
            feed_id = int(self.request.GET['feed'])
            feed = Feed.get(Feed.id == feed_id)
            count, q = self.get_feed_entries(
                feed, Entry.id).count(), self.get_feed_entries(feed)
            panel_title = feed.title
            filter_class = 'feeds'
            filter_name = 'feed=%s' % feed_id
            page_title = feed.title
        elif 'all' in self.request.GET:
            count, q = self.get_all_entries(
                Entry.id).count(), self.get_all_entries()
            panel_title = 'All'
            filter_class = filter_name = 'all'
            page_title = 'All'
        else:  # Default
            count, q = self.get_unread_entries(
                Entry.id).count(), self.get_unread_entries()
            panel_title = 'Unread'
            filter_class = filter_name = 'unread'
            page_title = 'Unread'

        # Cleanup namespace
        del r, s, self

        return q, locals()

    # Views

    @GET(r'^/$')
    @login_required
    def index(self):
        return self.entry_list()

    # Entries

    @GET(r'^/entries/(\d+)$')
    @login_required
    def entry(self, entry_id):
        try:
            entry = Entry.get((Entry.id == entry_id))
        except Entry.DoesNotExist:
            raise HTTPNotFound('No such entry %s' % entry_id)

        self.mark_entry(entry, 'read')

        q, namespace = self._make_view_variables()
        n = q.where(Entry.last_updated_on < entry.last_updated_on).order_by(
            Entry.last_updated_on.desc()).limit(1)

        namespace.update({
            'entry': entry,
            'page_title': entry.title,
            'next_entries': n,
            'count': 0  # Fake it
        })

        return self.respond_with_template('entry.html', namespace)

    @POST(r'^/entries/(\d+)$')
    @login_required
    def entry_post(self, entry_id):
        '''
        Mark an entry as read, unread, saved and unsaved
        '''
        try:
            status = self.request.POST['as']
        except KeyError:
            raise HTTPBadRequest(
                'Missing parameter as=read|unread|saved|unsaved')

        try:
            entry = Entry.get((Entry.id == entry_id))
        except Entry.DoesNotExist:
            raise HTTPNotFound('No such entry %s' % entry_id)

        if 'mark' in self.request.POST:
            self.mark_entry(entry, status)

    @GET(r'^/entries/?$')
    @login_required
    def entry_list(self):
        '''
        Show entries filtered and possibly paginated by: 
            unread, saved, group or feed
        '''
        q, namespace = self._make_view_variables()

        offset = int(self.request.GET.get('offset', 0))
        entries = q.order_by(Entry.last_updated_on.desc()).offset(
            offset).limit(ENTRIES_PER_PAGE)

        namespace.update({
            'entries':
            q.order_by(Entry.last_updated_on.desc()).offset(offset).limit(
                ENTRIES_PER_PAGE),
            'offset':
            offset + ENTRIES_PER_PAGE,
            'prev_date':
            self.request.GET.get('prev_date', None),
            #'count'     : count
        })

        return self.respond_with_template('entries.html', namespace)

    @form(r'^/entries/mark$')
    @login_required
    def entry_list_post(self):
        '''
        Mark feed|all entries as read
        '''
        feed_id = int(self.request.GET.get('feed', 0))

        if self.request.method == 'GET':
            now = datetime.utcnow()
            return self.respond_with_template(
                '_entries_mark_%s_read.html' % ('feed' if feed_id else 'all'),
                locals())

        # Handle postback
        try:
            before = datetime.utcfromtimestamp(int(
                self.request.POST['before']))
        except (KeyError, ValueError):
            raise HTTPBadRequest('Missing parameter before=time')

        if feed_id:
            try:
                feed = Feed.get((Feed.id == feed_id))
            except Feed.DoesNotExist:
                raise HTTPNotFound('No such feed %s' % feed_id)

            q = Entry.select(Entry).join(Feed).join(Subscription).where(
                (Subscription.user == self.user) &
                # Exclude entries already marked as read
                ~(Entry.id << Read.select(Read.entry).where(
                    Read.user == self.user)) &
                # Filter by current feed
                (Entry.feed == feed) &
                # Exclude entries fetched after the page load
                (Feed.last_checked_on < before)).distinct()
            message = 'SUCCESS Feed has been marked as read'
            redirect_url = '%s/entries/?feed=%s' % (self.application_url,
                                                    feed_id)
        else:
            q = Entry.select(Entry).join(Feed).join(Subscription).where(
                (Subscription.user == self.user) &
                # Exclude entries already marked as read
                ~(Entry.id << Read.select(Read.entry).where(
                    Read.user == self.user)) &
                # Exclude entries fetched after the page load
                (Feed.last_checked_on < before)).distinct()
            message = 'SUCCESS All entries have been marked as read'
            redirect_url = '%s/entries/?unread' % self.application_url

        #@@TODO: Use insert_many()
        with transaction():
            for entry in q:
                try:
                    Read.create(user=self.user, entry=entry)
                except IntegrityError:
                    logger.debug(u'entry %d already marked as read, ignored' %
                                 entry.id)
                    continue

        self.alert_message = message
        return self.respond_with_script('_modal_done.js',
                                        {'location': redirect_url})

    # Groups

    @GET(r'^/groups/?$')
    @login_required
    def group_list(self):
        '''
        Show feed groups for current user
        '''
        offset, group_id, filter_class, panel_title, page_title = 0, 0, 'groups', 'Groups', 'Groups'

        count, q = self.get_groups().count(), self.get_groups()
        offset = int(self.request.GET.get('offset', 0))
        groups = q.offset(offset).limit(GROUPS_PER_PAGE)
        offset += GROUPS_PER_PAGE

        return self.respond_with_template('groups.html', locals())

    # Feeds

    @GET(r'^/feeds/?$')
    @login_required
    def feed_list(self):
        '''
        Show subscribed feeds for current user
        '''
        offset, group_id, feed_id, filter_class, panel_title, page_title = 0, 0, 0, 'feeds', 'Feeds', 'Feeds'

        max_errors = config.fetcher.max_errors
        groups = self.get_groups()
        offset = int(self.request.GET.get('offset', 0))
        count, q = self.get_feeds(Feed.id).count(), self.get_feeds()
        feeds = q.order_by(Feed.title).offset(offset).limit(FEEDS_PER_PAGE)
        offset += FEEDS_PER_PAGE

        return self.respond_with_template('feeds.html', locals())

    @form(r'^/feeds/edit/(\d+)$')
    @login_required
    def feed(self, feed_id):
        form_message = ''
        try:
            feed = Feed.get(Feed.id == feed_id)
        except Feed.DoesNotExist:
            raise HTTPNotFound('No such feed %s' % feed_id)

        # Collect editable fields
        title = feed.title

        q = Subscription.select(
            Subscription,
            Group).join(Group).where((Subscription.user == self.user)
                                     & (Subscription.feed == feed))
        groups = [s.group for s in q]

        if self.request.method == 'GET':
            return self.respond_with_template('_feed_edit.html', locals())

        # Handle postback
        form = self.request.POST

        title = form.get('title', '').strip()
        if not title:
            form_message = u'ERROR Error, feed title cannot be empty'
            return self.respond_with_template('_feed_edit.html', locals())
        feed.title = title
        feed.save()
        self.alert_message = u'SUCCESS Changes have been saved.'
        return self.respond_with_script(
            '_modal_done.js', {'location': '%s/feeds/' % self.application_url})

    @form(r'^/feeds/remove/(\d+)$')
    @login_required
    def feed_remove(self, feed_id):

        try:
            feed = Feed.get(Feed.id == feed_id)
        except Feed.DoesNotExist:
            raise HTTPNotFound('No such feed %s' % feed_id)

        if self.request.method == 'GET':
            return self.respond_with_modal(
                '%s/feeds/remove/%d' % (self.application_url, feed.id),
                title=u'Remove <i>%s</i> from your subscriptions?' %
                feed.title,
                button='Remove')

        # Handle postback
        Subscription.delete().where((Subscription.user == self.user)
                                    & (Subscription.feed == feed)).execute()
        self.alert_message = u'SUCCESS You are no longer subscribed to <i>%s</i>.' % feed.title

        return self.redirect_after_post('%s/feeds/' % self.application_url)

    @form(r'^/feeds/enable/(\d+)$')
    @login_required
    def feed_enable(self, feed_id):

        #@@TODO: Track in which view user triggers command

        try:
            feed = Feed.get(Feed.id == feed_id)
        except Feed.DoesNotExist:
            raise HTTPNotFound('No such feed %s' % feed_id)

        if self.request.method == 'GET':
            return self.respond_with_modal(
                '%s/feeds/enable/%d' % (self.application_url, feed.id),
                title=u'Enable <i>%s</i> again?' % feed.title,
                body=
                'Coldsweat will attempt to update it again during the next feeds fetch.',
                button='Enable')

        # Handle postback
        feed.is_enabled, feed.error_count = True, 0
        feed.save()
        self.alert_message = u'SUCCESS Feed <i>%s</i> is now enabled.' % feed.title

        return self.redirect_after_post('%s/feeds/' % self.application_url)

    @form(r'^/feeds/add/1$')
    @login_required
    def feed_add_1(self):
        form_message = ''
        groups = self.get_groups()

        # URL could be passed via a GET (bookmarklet) or POST
        self_link = self.request.params.get('self_link', '').strip()

        if self.request.method == 'GET':
            return self.respond_with_template('_feed_add_wizard_1.html',
                                              locals())

        # Handle POST

        group_id = int(self.request.POST.get('group', 0))

        # Assume HTTP if URL is passed w/out scheme
        self_link = self_link if self_link.startswith(
            'http') else u'http://' + self_link

        if not validate_url(self_link):
            form_message = u'ERROR Error, specify a valid web address'
            return self.respond_with_template('_feed_add_wizard_1.html',
                                              locals())

        try:
            response = fetch_url(self_link)
        except RequestException, exc:
            form_message = u'ERROR Error, feed address is incorrect or host is unreachable.'
            return self.respond_with_template('_feed_add_wizard_1.html',
                                              locals())
        #else:
        #form_message = u'ERROR Error, a network error occured'
        #return self.respond_with_template('_feed_add_wizard_1.html', locals())

        if not sniff_feed(response.text):
            links = find_feed_links(response.text, base_url=self_link)
            return self.respond_with_template('_feed_add_wizard_2.html',
                                              locals())

        # It's a feed

        feed = self.add_feed_from_url(self_link, fetch_data=False)
        logger.debug(u"starting fetcher")
        trigger_event('fetch_started')
        Fetcher(feed).update_feed_with_data(response.text)
        trigger_event('fetch_done', [feed])

        return self._add_subscription(feed, group_id)