def add(self, url): feed_guesser = self.get_feed(url) if feed_guesser['success']: feed, url = feed_guesser['output'] else: return feed_guesser # Bad feed if feed.version == '' or not feed.feed.get('title'): return {'success': False, 'output': 'Bad feed'} title = feed.feed['title'] link = feed.feed['link'] feed_id = storage.get_feed_by_title(title) if not feed_id: if feed.feed.get('updated_parsed'): feed_update = get_datetime(feed.feed.updated_parsed) elif feed.get('updated_parsed'): feed_update = get_datetime(feed.updated_parsed) elif feed.feed.get('published_parsed'): feed_update = get_datetime(feed.feed.published_parsed) elif feed.get('published_parsed'): feed_update = get_datetime(feed.published_parsed) else: feed_update = datetime.datetime.now() feed_id = storage.add_feed({'url': url, 'title': title, 'link': link, 'last_update': feed_update}) else: return {'success': False, 'output': 'Feed already exists'} retriever = Retriever(feed, do_retention=False) retriever.start() return { 'success': True, 'title': title, 'url': url, 'link': link, 'feed_id': feed_id, 'output': 'Feed added', 'counter': len(feed['entries'])}
def run(self): # This feed comes from database feed = storage.get_feed_by_title(self.title) feed_id = feed.get('_id') for entry in self.data: title = entry.get('title') link = entry.get('link') guid = entry.get('guid') or entry.get('id') or title if storage.get_story_by_guid(feed_id, guid): storage.remove_story(storage.get_story_by_guid(feed_id, guid).get('_id')) try: description = entry['content'][0]['value'] except KeyError: description = entry['summary'] if entry.get('updated_parsed'): last_update = get_datetime(entry.updated_parsed) else: last_update = datetime.datetime.now() if entry.get('published_parsed', False): published = get_datetime(entry.published_parsed) else: published = datetime.datetime.now() if self.do_retention: delta = datetime.datetime.now() - published if delta.days > int(config.get('worker', 'retention')): continue storage.add_story({ 'title': title, 'guid': guid, 'link': link, 'description': description, 'published': published, 'last_update': last_update, 'feed_id': feed_id, 'read': False})
def run(self): self.data = feedparser.parse(self.feed.get('url')) if self.data.get('bozo_exception', False): print("!! Can't retrieve %s feed (%s)" % (self.feed_title.encode('utf-8'), self.data['bozo_exception'])) return need_update = False # Update title if it change if self.data.feed.get('title') != self.feed_title: self.feed['title'] = self.data.feed.get('title') self.feed_title = self.feed['title'] need_update = True # Add website url if not setted if self.data.feed.get('link') != self.feed.get('link'): self.feed['link'] = self.data.feed.get('link') self.feed_link = self.feed['link'] need_update = True if need_update: storage.update_feed(self.feed_id, copy.copy(self.feed)) local_update = self.feed.get('last_update') remote_update = False if self.data.feed.get('updated_parsed'): remote_update = get_datetime(self.data.feed.updated_parsed) if self.data.get('updated_parsed'): if remote_update: if get_datetime(self.data.updated_parsed) > remote_update: remote_update = get_datetime(self.data.updated_parsed) else: remote_update = get_datetime(self.data.updated_parsed) if self.data.feed.get('published_parsed'): if remote_update: if get_datetime(self.data.feed.published_parsed) > remote_update: remote_update = get_datetime(self.data.feed.published_parsed) else: remote_update = get_datetime(self.data.feed.published_parsed) if self.data.get('published_parsed'): if remote_update: if get_datetime(self.data.published_parsed) > remote_update: remote_update = get_datetime(self.data.published_parsed) else: remote_update = get_datetime(self.data.published_parsed) if not remote_update: remote_update = datetime.datetime.now() if remote_update > local_update: print('!! %s is outdated.' % self.feed_title.encode('utf-8')) readed = [] for entry in storage.get_stories(self.feed_id, "published", 0, 0): if entry.get('read'): readed.append(entry.get('guid')) if len(self.data.entries) <= int(config.get('worker', 'story_before_retention')): do_retention = False else: do_retention = True retriever = Retriever(self.data, self.feed, do_retention=do_retention) retriever.start() retriever.join() for entry_guid in readed: entry = storage.get_story_by_guid(self.feed_id, entry_guid) if entry: # print(' _ update: %s' % entry['title'].encode('utf-8')) entry['read'] = True storage.update_story(entry['_id'], copy.copy(entry)) self.feed['last_update'] = remote_update storage.update_feed(self.feed_id, self.feed) else: print('=> %s is up-to-date.' % self.feed_title.encode('utf-8'))