Esempio n. 1
0
    def add(self, url):
        feed_guesser = self.get_feed(url)
        if feed_guesser['success']:
            feed, url = feed_guesser['output']
        else:
            return feed_guesser

        # Bad feed
        if feed.version == '' or not feed.feed.get('title'):
            return {'success': False, 'output': 'Bad feed'}

        title = feed.feed['title']
        link = feed.feed['link']
        feed_id = storage.get_feed_by_title(title)
        if not feed_id:
            if feed.feed.get('updated_parsed'):
                feed_update = get_datetime(feed.feed.updated_parsed)
            elif feed.get('updated_parsed'):
                feed_update = get_datetime(feed.updated_parsed)
            elif feed.feed.get('published_parsed'):
                feed_update = get_datetime(feed.feed.published_parsed)
            elif feed.get('published_parsed'):
                feed_update = get_datetime(feed.published_parsed)
            else:
                feed_update = datetime.datetime.now()

            feed_id = storage.add_feed({'url': url,
                                        'title': title,
                                        'link': link,
                                        'last_update': feed_update})
        else:
            return {'success': False, 'output': 'Feed already exists'}

        retriever = Retriever(feed, do_retention=False)
        retriever.start()

        return {
            'success': True,
            'title': title,
            'url': url,
            'link': link,
            'feed_id': feed_id,
            'output': 'Feed added',
            'counter': len(feed['entries'])}
Esempio n. 2
0
    def run(self):
        # This feed comes from database
        feed = storage.get_feed_by_title(self.title)
        feed_id = feed.get('_id')

        for entry in self.data:
            title = entry.get('title')
            link = entry.get('link')
            guid = entry.get('guid') or entry.get('id') or title

            if storage.get_story_by_guid(feed_id, guid):
                storage.remove_story(storage.get_story_by_guid(feed_id, guid).get('_id'))

            try:
                description = entry['content'][0]['value']
            except KeyError:
                description = entry['summary']

            if entry.get('updated_parsed'):
                last_update = get_datetime(entry.updated_parsed)
            else:
                last_update = datetime.datetime.now()
            if entry.get('published_parsed', False):
                published = get_datetime(entry.published_parsed)
            else:
                published = datetime.datetime.now()

            if self.do_retention:
                delta = datetime.datetime.now() - published
                if delta.days > int(config.get('worker', 'retention')):
                    continue

            storage.add_story({
                'title': title,
                'guid': guid,
                'link': link,
                'description': description,
                'published': published,
                'last_update': last_update,
                'feed_id': feed_id,
                'read': False})
Esempio n. 3
0
    def run(self):
        self.data = feedparser.parse(self.feed.get('url'))
        if self.data.get('bozo_exception', False):
            print("!! Can't retrieve %s feed (%s)" % (self.feed_title.encode('utf-8'), self.data['bozo_exception']))
            return

        need_update = False
        # Update title if it change
        if self.data.feed.get('title') != self.feed_title:
            self.feed['title'] = self.data.feed.get('title')
            self.feed_title = self.feed['title']
            need_update = True
        # Add website url if not setted
        if self.data.feed.get('link') != self.feed.get('link'):
            self.feed['link'] = self.data.feed.get('link')
            self.feed_link = self.feed['link']
            need_update = True

        if need_update:
            storage.update_feed(self.feed_id, copy.copy(self.feed))

        local_update = self.feed.get('last_update')
        remote_update = False
        if self.data.feed.get('updated_parsed'):
            remote_update = get_datetime(self.data.feed.updated_parsed)
        if self.data.get('updated_parsed'):
            if remote_update:
                if get_datetime(self.data.updated_parsed) > remote_update:
                    remote_update = get_datetime(self.data.updated_parsed)
            else:
                remote_update = get_datetime(self.data.updated_parsed)
        if self.data.feed.get('published_parsed'):
            if remote_update:
                if get_datetime(self.data.feed.published_parsed) > remote_update:
                    remote_update = get_datetime(self.data.feed.published_parsed)
            else:
                remote_update = get_datetime(self.data.feed.published_parsed)
        if self.data.get('published_parsed'):
            if remote_update:
                if get_datetime(self.data.published_parsed) > remote_update:
                    remote_update = get_datetime(self.data.published_parsed)
            else:
                remote_update = get_datetime(self.data.published_parsed)

        if not remote_update:
            remote_update = datetime.datetime.now()

        if remote_update > local_update:
            print('!! %s is outdated.' % self.feed_title.encode('utf-8'))
            readed = []
            for entry in storage.get_stories(self.feed_id, "published", 0, 0):
                if entry.get('read'):
                    readed.append(entry.get('guid'))

            if len(self.data.entries) <= int(config.get('worker', 'story_before_retention')):
                do_retention = False
            else:
                do_retention = True

            retriever = Retriever(self.data, self.feed, do_retention=do_retention)
            retriever.start()
            retriever.join()

            for entry_guid in readed:
                entry = storage.get_story_by_guid(self.feed_id, entry_guid)
                if entry:
                    # print(' _ update: %s' % entry['title'].encode('utf-8'))
                    entry['read'] = True
                    storage.update_story(entry['_id'], copy.copy(entry))

            self.feed['last_update'] = remote_update
            storage.update_feed(self.feed_id, self.feed)

        else:
            print('=> %s is up-to-date.' % self.feed_title.encode('utf-8'))