class Heartbeat: """heartbeat class - Will inform the user on regular intervals whether the bot is still alive""" __log__ = logging.getLogger('flathunt') def __init__(self, config, interval): self.config = config if not isinstance(self.config, Config): raise Exception( "Invalid config for hunter - should be a 'Config' object") self.notifiers = self.config.get('notifiers', list()) if 'mattermost' in self.notifiers: self.notifier = SenderMattermost(config) elif 'telegram' in self.notifiers: self.notifier = SenderTelegram(config) else: self.notifier = None self.interval = interval2counter(interval) def send_heartbeat(self, counter): """Send a new heartbeat message""" # its time for a new heartbeat message and reset counter if self.notifier is not None and self.interval is not None: if counter % self.interval == 0: self.notifier.send_msg( 'Beep Boop. This is a heartbeat message. Your bot is searching actively for flats.' ) counter = 0 return counter
def hunt_flats(self, config, searchers, id_watch): sender = SenderTelegram(config) new_links = 0 processed = id_watch.get() for url in config.get('urls', list()): self.__log__.debug('Processing URL: ' + url) try: for searcher in searchers: if re.search(searcher.URL_PATTERN, url): results = searcher.get_results(url) break except requests.exceptions.ConnectionError: self.__log__.warning("Connection to %s failed. Retrying. " % url.split('/')[2]) continue # on error, stop execution if not results: break uniq_results = list({v['id']: v for v in results}.values()) for expose in uniq_results: # check if already processed if expose['id'] in processed: continue self.__log__.info('New offer: ' + expose['title']) # to reduce traffic, some addresses need to be loaded on demand address = expose['address'] if address.startswith('http'): url = address for searcher in searchers: if re.search(searcher.URL_PATTERN, url): address = searcher.load_address(url) self.__log__.debug("Loaded address %s for url %s" % (address, url)) break # calculdate durations message = config.get('message', "").format( title=expose['title'], rooms=expose['rooms'], size=expose['size'], price=expose['price'], url=expose['url'], durations=self.get_formatted_durations(config, address)).strip() # send message to all receivers sender.send_msg(message) new_links = new_links + 1 id_watch.add(expose['id']) self.__log__.info(str(new_links) + ' new offer found')
def hunt_flats(self, config, searchers, id_watch): sender = SenderTelegram(config) new_links = 0 processed = id_watch.get() for url in config.get("urls", list()): self.__log__.debug("Processing URL: " + url) try: for searcher in searchers: if re.search(searcher.URL_PATTERN, url): results = searcher.get_results(url) break except requests.exceptions.ConnectionError: self.__log__.warning("Connection to %s failed. Retrying. " % url.split("/")[2]) continue # on error, stop execution if not results: print("something broke, results empty") break for expose in results: # check if already processed if expose["id"] in processed: continue self.__log__.info("New offer: " + expose["title"]) # to reduce traffic, some addresses need to be loaded on demand address = expose["address"] if address.startswith("http"): url = address for searcher in searchers: if re.search(searcher.URL_PATTERN, url): address = searcher.load_address(url) self.__log__.debug("Loaded address %s for url %s" % (address, url)) break # calculdate durations message = (config.get("message", "").format( title=expose["title"], rooms=expose["rooms"], size=expose["size"], price=expose["price"], url=expose["url"], ).strip()) # send message to all receivers sender.send_msg(message) new_links = new_links + 1 id_watch.add(expose["id"]) print(str(new_links) + " new offer found") self.__log__.info(str(new_links) + " new offer found")
def test_send_no_message_if_no_receivers(self, m): sender = SenderTelegram( {"telegram": { "bot_token": "dummy_token", "receiver_ids": None }}) self.assertEqual(None, sender.send_msg("result"), "Expected no message to be sent")
class SenderTelegramTest(unittest.TestCase): def setUp(self): self.sender = SenderTelegram({ "telegram": { "bot_token": "dummy_token", "receiver_ids": [ 123 ] }}) @requests_mock.Mocker() def test_send_message(self, m): mock_response = '{"ok":true,"result":{"message_id":456,"from":{"id":1,"is_bot":true,"first_name":"Wohnbot","username":"******"},"chat":{"id":5,"first_name":"Arthur","last_name":"Taylor","type":"private"},"date":1589813130,"text":"hello arthur"}}' m.get('https://api.telegram.org/botdummy_token/sendMessage?chat_id=123&text=result', text=mock_response) self.assertEqual(None, self.sender.send_msg("result"), "Expected message to be sent")
def hunt_flats(self, config, searchers, id_watch): sender = SenderTelegram(config) new_links = 0 processed = id_watch.get() for url in config.get('urls', list()): self.__log__.debug('Processing URL: ' + url) # TODO: improve control flow try: for searcher in searchers: if re.search(searcher.URL_PATTERN, url): results = searcher.get_results(url) break except requests.exceptions.ConnectionError: self.__log__.warning("Connection to %s failed. Retrying. " % url.split('/')[2]) continue except Exception as e: self.__log__.warning("Unknown error: {}".format(e.with_traceback())) continue # on error, stop execution if not results: break for expose in results: # check if already processed if expose['id'] in processed: continue self.__log__.info('New offer: ' + expose['title']) # to reduce traffic, some addresses need to be loaded on demand address = expose['address'] if address.startswith('http'): # ugh, TODO url = address for searcher in searchers: if re.search(searcher.URL_PATTERN, url): address = searcher.load_address(url) self.__log__.debug("Loaded address %s for url %s" % (address, url)) break # filter districts blacklist = config.get('blacklist', list()) address = ' '.join(filter(lambda x: x not in blacklist, address.split())) # add to visited list already now so that we can actually skip if entry does not match date filter id_watch.add(expose['id']) # get date if necessary if not "date" in expose: for searcher in searchers: if re.search(searcher.URL_PATTERN, url): expose["date"] = searcher.load_date(expose["url"]) self.__log__.debug("Loaded date {} for url {}".format(address, expose["url"])) break date_filter = config.get('date_filter', dict()) mismatched_date = False for blacklisted_phrase in date_filter.get("blacklist_phrases", []): if blacklisted_phrase in expose["date"]: mismatched_date = True break if mismatched_date: # go to next expose self.__log__.info("Skipping entry, date {} matches blacklist".format(expose["date"])) continue # try to parse date string try: parsed_date = parser.parse(expose["date"], dayfirst=True).date() date_min = date_filter.get('date_min') date_max = date_filter.get('date_max') if date_min and parsed_date < date_min: self.__log__.info("Skipping entry, date {} too early".format(expose["date"])) continue if date_max and parsed_date > date_max: self.__log__.info("Skipping entry, date {} too late".format(expose["date"])) continue except (ValueError, OverflowError): self.__log__.debug("Could not parse date {} for url {} - ignoring filters".format(expose["date"], url)) # calculdate durations message = config.get('message', "").format( title=expose['title'], date=expose['date'], rooms=expose['rooms'], size=expose['size'], price=expose['price'], url=expose['url'], address=address, durations=self.get_formatted_durations(config, address)).strip() # send message to all receivers sender.send_msg(message) new_links = new_links + 1 self.__log__.info(str(new_links) + ' new offers found')
def hunt_flats(self, connection=None): sender = SenderTelegram(self.config) new_exposes = [] processed = self.id_watch.get(connection) for url in self.config.get('urls', list()): self.__log__.debug('Processing URL: ' + url) try: for searcher in self.searchers: if re.search(searcher.URL_PATTERN, url): results = searcher.get_results(url) break except requests.exceptions.ConnectionError: self.__log__.warning("Connection to %s failed. Retrying. " % url.split('/')[2]) continue # on error, stop execution if not results: self.__log__.debug('No results for: ' + url) continue for expose in results: # check if already processed if expose['id'] in processed: continue self.__log__.info('New offer: ' + expose['title']) # to reduce traffic, some addresses need to be loaded on demand address = expose['address'] if address.startswith('http'): url = address for searcher in self.searchers: if re.search(searcher.URL_PATTERN, url): address = searcher.load_address(url) self.__log__.debug("Loaded address %s for url %s" % (address, url)) break # calculate durations if enabled durations_enabled = "google_maps_api" in self.config and self.config[ "google_maps_api"]["enable"] if durations_enabled: durations = self.get_formatted_durations( self.config, address).strip() message = self.config.get('message', "").format( title=expose['title'], rooms=expose['rooms'], size=expose['size'], price=expose['price'], url=expose['url'], address=address, durations="" if not durations_enabled else durations).strip() # if no excludes, send messages if len(self.excluded_titles) == 0: # send message to all receivers sender.send_msg(message) new_exposes.append(expose) self.id_watch.add(expose['id'], connection) continue # combine all the regex patterns into one combined_excludes = "(" + ")|(".join( self.excluded_titles) + ")" found_objects = re.search(combined_excludes, expose['title'].lower()) # send all non matching regex patterns if not found_objects: # send message to all receivers sender.send_msg(message) new_exposes.append(expose) self.id_watch.add(expose['id'], connection) self.__log__.info(str(len(new_exposes)) + ' new offers found') self.id_watch.update_last_run_time(connection) return new_exposes