def hunt_flats(self): filter = Filter.builder() \ .filter_already_seen(self.id_watch) \ .build() processor_chain = ProcessorChain.builder(self.config) \ .apply_filter(filter) \ .crawl_expose_details() \ .save_all_exposes(self.id_watch) \ .resolve_addresses() \ .calculate_durations() \ .build() new_exposes = [] for expose in processor_chain.process( self.crawl_for_exposes(max_pages=1)): new_exposes.append(expose) for (user_id, settings) in self.id_watch.get_user_settings(): if 'mute_notifications' in settings: continue filter = Filter.builder().read_config(settings).build() processor_chain = ProcessorChain.builder(self.config) \ .apply_filter(filter) \ .send_telegram_messages([ user_id ]) \ .build() for message in processor_chain.process(new_exposes): self.__log__.debug("Sent expose " + str(message['id']) + " to user " + str(user_id)) self.id_watch.update_last_run_time() return list(new_exposes)
def hunt_flats(self, max_pages=1): """Crawl all URLs, and send notifications to users of new flats""" filter_set = Filter.builder() \ .read_config(self.config) \ .filter_already_seen(self.id_watch) \ .build() processor_chain = ProcessorChain.builder(self.config) \ .apply_filter(filter_set) \ .crawl_expose_details() \ .save_all_exposes(self.id_watch) \ .resolve_addresses() \ .calculate_durations() \ .send_messages() \ .build() new_exposes = [] for expose in processor_chain.process( self.crawl_for_exposes(max_pages=max_pages)): new_exposes.append(expose) for (user_id, settings) in self.id_watch.get_user_settings(): if 'mute_notifications' in settings: continue filter_set = Filter.builder().read_config(settings).build() processor_chain = ProcessorChain.builder(self.config) \ .apply_filter(filter_set) \ .send_messages([user_id]) \ .build() for message in processor_chain.process(new_exposes): self.__log__.debug("Sent expose %d to user %d", message['id'], user_id) self.id_watch.update_last_run_time() return list(new_exposes)
def test_exposes_are_returned_filtered(): config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS) id_watch = IdMaintainer(":memory:") hunter = Hunter(config, [DummyCrawler()], id_watch) hunter.hunt_flats() hunter.hunt_flats() filter = Filter.builder().max_size_filter(70).build() saved = id_watch.get_recent_exposes(10, filter_set=filter) assert len(saved) == 10 for expose in saved: assert int(re.match(r'\d+', expose['size'])[0]) <= 70
def test_exposes_are_returned_filtered(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) hunter.hunt_flats() hunter.hunt_flats() filter = Filter.builder().max_size_filter(70).build() saved = id_watch.get_recent_exposes(10, filter=filter) assert len(saved) == 10 for expose in saved: assert int(re.match(r'\d+', expose['size'])[0]) <= 70
def hunt_flats(self, max_pages=None): filter = Filter.builder() \ .read_config(self.config) \ .filter_already_seen(self.id_watch) \ .build() processor_chain = ProcessorChain.builder(self.config) \ .save_all_exposes(self.id_watch) \ .apply_filter(filter) \ .resolve_addresses() \ .calculate_durations() \ .send_telegram_messages() \ .build() result = [] # We need to iterate over this list to force the evaluation of the pipeline for expose in processor_chain.process( self.crawl_for_exposes(max_pages)): self.__log__.info('New offer: ' + expose['title']) result.append(expose) return result
def get_filter(self): """Read the configured filter""" builder = Filter.builder() builder.read_config(self.config) return builder.build()
def get_filter(self): builder = Filter.builder() builder.read_config(self.config) return builder.build()