Ejemplo n.º 1
0
    def hunt_flats(self, max_pages=1):
        """Crawl all URLs, and send notifications to users of new flats"""
        filter_set = Filter.builder() \
                       .read_config(self.config) \
                       .filter_already_seen(self.id_watch) \
                       .build()

        processor_chain = ProcessorChain.builder(self.config) \
                                        .apply_filter(filter_set) \
                                        .crawl_expose_details() \
                                        .save_all_exposes(self.id_watch) \
                                        .resolve_addresses() \
                                        .calculate_durations() \
                                        .send_messages() \
                                        .build()

        new_exposes = []
        for expose in processor_chain.process(
                self.crawl_for_exposes(max_pages=max_pages)):
            new_exposes.append(expose)

        for (user_id, settings) in self.id_watch.get_user_settings():
            if 'mute_notifications' in settings:
                continue
            filter_set = Filter.builder().read_config(settings).build()
            processor_chain = ProcessorChain.builder(self.config) \
                                            .apply_filter(filter_set) \
                                            .send_messages([user_id]) \
                                            .build()
            for message in processor_chain.process(new_exposes):
                self.__log__.debug("Sent expose %d to user %d", message['id'],
                                   user_id)

        self.id_watch.update_last_run_time()
        return list(new_exposes)
Ejemplo n.º 2
0
    def hunt_flats(self):
        filter = Filter.builder() \
                       .filter_already_seen(self.id_watch) \
                       .build()

        processor_chain = ProcessorChain.builder(self.config) \
                                        .apply_filter(filter) \
                                        .crawl_expose_details() \
                                        .save_all_exposes(self.id_watch) \
                                        .resolve_addresses() \
                                        .calculate_durations() \
                                        .build()

        new_exposes = []
        for expose in processor_chain.process(
                self.crawl_for_exposes(max_pages=1)):
            new_exposes.append(expose)

        for (user_id, settings) in self.id_watch.get_user_settings():
            if 'mute_notifications' in settings:
                continue
            filter = Filter.builder().read_config(settings).build()
            processor_chain = ProcessorChain.builder(self.config) \
                                            .apply_filter(filter) \
                                            .send_telegram_messages([ user_id ]) \
                                            .build()
            for message in processor_chain.process(new_exposes):
                self.__log__.debug("Sent expose " + str(message['id']) +
                                   " to user " + str(user_id))

        self.id_watch.update_last_run_time()
        return list(new_exposes)
Ejemplo n.º 3
0
 def test_address_processor(self):
     crawler = DummyCrawler(addresses_as_links=True)
     config = Config(string=self.DUMMY_CONFIG)
     exposes = crawler._get_results("https://www.example.com/search")
     for expose in exposes:
         self.assertTrue(expose['address'].startswith('http'), "Expected addresses not yet to be processed")
     chain = ProcessorChain.builder(config) \
         .resolve_addresses([crawler]) \
         .build()
     exposes = chain.process(exposes)
     for expose in exposes:
         self.assertFalse(expose['address'].startswith('http'), "Expected addresses to be processed")
Ejemplo n.º 4
0
    def hunt_flats(self, max_pages=None):
        filter = Filter.builder() \
                       .read_config(self.config) \
                       .filter_already_seen(self.id_watch) \
                       .build()

        processor_chain = ProcessorChain.builder(self.config) \
                                        .save_all_exposes(self.id_watch) \
                                        .apply_filter(filter) \
                                        .resolve_addresses() \
                                        .calculate_durations() \
                                        .send_telegram_messages() \
                                        .build()

        result = []
        # We need to iterate over this list to force the evaluation of the pipeline
        for expose in processor_chain.process(
                self.crawl_for_exposes(max_pages)):
            self.__log__.info('New offer: ' + expose['title'])
            result.append(expose)

        return result