def update_feed(self, url): try: feed = FeedHandler.parse_feed(url[0]) except: feed = False traceback.print_exc() # ??? if feed: print(f'{url[0]}:') print(f'Longitud de feed: {len(feed)}') url_items = self.db.get_url_items(url=url[0]) for item in url_items: url_items[item]['active'] = False new_items = [] for item in feed: hash = str( CityHash64(item['summary'] + item['title'] + item['link'])) if not (hash in url_items): new_items.append(item) url_items[hash] = { 'active': True, 'last_date': DateHandler.get_datetime_now() } for item, value in url_items.copy().items(): if not value['active']: print(f'Desactivando {item}') if not value['active'] and DateHandler.is_older_than_days( value['last_date'], 5): print(f'Borrando {item}') url_items.pop(item) self.db.update_url_items(url=url[0], items=url_items) telegram_users = self.db.get_users_for_url(url=url[0]) for user in telegram_users: if user[6]: # is_active if not feed: message = "Something went wrong when I tried to parse the URL: \n\n " + \ url[0] + "\n\nCould you please check that for me? Remove the url from your subscriptions using the /remove command, it seems like it does not work anymore!" self.bot.send_message(chat_id=user[0], text=message, parse_mode=ParseMode.HTML) return for post in new_items: self.send_message(post=post, user=user)
def get(self, bot, update, args): """ Manually parses an rss feed """ telegram_user = update.message.from_user if len(args) > 2: message = "To get the last news of your subscription please use /get <entryname> [optional: <count 1-10>]. Make sure you first add a feed using the /add command." update.message.reply_text(message) return if len(args) == 2: args_entry = args[0] args_count = int(args[1]) else: args_entry = args[0] args_count = 4 if not(1 <= args_count <= 10): message = "Count parameter (if used) must be between 1 and 10.\n" + \ "Usage: /get <entryname> [optional: <count 1-10>]" update.message.reply_text(message) return url = self.db.get_user_bookmark(telegram_id=telegram_user.id, alias=args_entry) if url is None: message = "I can not find an entry with label " + \ args_entry + " in your subscriptions! Please check your subscriptions using /list and use the delete command again!" update.message.reply_text(message) return entries = FeedHandler.parse_feed(url[0], args_count) for entry in entries: message = f"[{url[1]}] <a href='{entry.link}'>{entry.title}</a>" print(message) try: update.message.reply_text(message, parse_mode=ParseMode.HTML) except Unauthorized: self.db.update_user(telegram_id=telegram_user.id, is_active=0) except TelegramError: # handle all other telegram related errors pass
def update_feed(self, url): telegram_users = self.db.get_users_for_url(url=url[0]) for user in telegram_users: if user[6]: # is_active try: for post in FeedHandler.parse_feed(url[0]): self.send_newest_messages( url=url, post=post, user=user) except: traceback.print_exc() message = "Something went wrong when I tried to parse the URL: \n\n " + \ url[0] + "\n\nCould you please check that for me? Remove the url from your subscriptions using the /remove command, it seems like it does not work anymore!" self.bot.send_message( chat_id=user[0], text=message, parse_mode=ParseMode.HTML) self.db.update_url(url=url[0], last_updated=str( DateHandler.get_datetime_now()))
def update_feed(self, url): if not self._finished.isSet(): try: get_url_info = self.db.get_update_url(url) last_url = get_url_info['last_url'] date_last_url = DateHandler.parse_datetime( get_url_info['last_update']) feed = FeedHandler.parse_feed( url, 4, date_last_url + timedelta(days=-1)) for post in feed: if not hasattr(post, "published") and not hasattr( post, "daily_liturgy"): logger.warning('not published' + url) continue # for index, post in enumerate(feed): date_published = DateHandler.parse_datetime(post.published) if hasattr(post, "daily_liturgy"): if date_published > date_last_url and post.link != last_url \ and post.daily_liturgy != '': message = post.title + '\n' + post.daily_liturgy result = self.send_newest_messages(message, url) if post == feed[-1] and result: self.update_url(url=url, last_update=date_published, last_url=post.link) elif date_published > date_last_url and post.link != last_url: message = post.title + '\n' + post.link result = self.send_newest_messages(message, url) if result: self.update_url(url=url, last_update=date_published, last_url=post.link) else: pass return True, url except TypeError as e: logger.error(f"TypeError {url} {str(e)}") return False, url, 'update_feed' except TelegramError as e: logger.error( f"except update_feed TelegramError {url} {str(e)}") return False, url, 'update_feed'
def update_feed(self, url): telegram_users = self.db.get_users_for_url(url=url[0]) telegram_channels = self.db.get_channels_for_url(url=url[0]) print("Processing url: {0}".format(url[0])) try: posts = FeedHandler.parse_feed(url[0]) except ValueError: traceback.print_exc() return for post in posts: print("Processing post: {0}".format(post.id)) for user in telegram_users: if user[6]: # is_active self.send_newest_messages(url=url, post=post, user=user) for channel in telegram_channels: self.send_newest_messages(url=url, post=post, user=channel) self.db.update_url(url=url[0], last_updated=str( DateHandler.get_datetime_now()))
def update_feed(url): try: get_url_info = db.get_update_url(url) last_url = get_url_info['last_url'] date_last_url = DateHandler.parse_datetime(get_url_info['last_update']) feed = FeedHandler.parse_feed(url, 4, date_last_url + timedelta(days=-1)) for post in feed: if not hasattr(post, "published") and not hasattr( post, "daily_liturgy"): logger.warning('not published' + url) continue date_published = DateHandler.parse_datetime(post.published) if hasattr(post, "daily_liturgy"): if date_published > date_last_url and post.link != last_url \ and post.daily_liturgy != '': message = post.title + '\n' + post.daily_liturgy result = send_newest_messages(message=message, url=url, disable_page_preview=True) if post == feed[-1] and result: update_url(url=url, last_update=date_published, last_url=post.link) elif date_published > date_last_url and post.link != last_url: message = post.title + '\n' + post.link result = send_newest_messages(message=message, url=url) if result: update_url(url=url, last_update=date_published, last_url=post.link) else: pass except TypeError as _: logger.error(f"TypeError {url} {str(_)}")
def test_parse_feed(self): url = "https://lorem-rss.herokuapp.com/feed" feed = FeedHandler.parse_feed(url) self.assertIsNotNone(url) url = "https://lorem-rss.herokuapp.com/feed"
def test_parse_feed_amount(self): url = "https://lorem-rss.herokuapp.com/feed" feed = FeedHandler.parse_feed(url, 5) self.assertIsNotNone(url) self.assertEqual(len(feed), 5)