def test_address_processor(self): crawler = DummyCrawler(addresses_as_links=True) config = Config(string=self.DUMMY_CONFIG) config.set_searchers([crawler]) exposes = crawler.get_results("https://www.example.com/search") for expose in exposes: self.assertTrue(expose['address'].startswith('http'), "Expected addresses not yet to be processed") chain = ProcessorChain.builder(Config(string=self.DUMMY_CONFIG)) \ .resolve_addresses() \ .build() exposes = chain.process(exposes) for expose in exposes: self.assertFalse(expose['address'].startswith('http'), "Expected addresses to be processed")
def test_is_processed_works(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 for expose in exposes: assert id_watch.is_processed(expose['id'])
def test_ids_are_added_to_maintainer(mocker): config = Config(string=IdMaintainerTest.DUMMY_CONFIG) config.set_searchers([DummyCrawler()]) id_watch = IdMaintainer(":memory:") spy = mocker.spy(id_watch, "mark_processed") hunter = Hunter(config, id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 assert spy.call_count == 24
def test_is_processed_works(mocker): config = Config(string=IdMaintainerTest.DUMMY_CONFIG) config.set_searchers([DummyCrawler()]) id_watch = IdMaintainer(":memory:") hunter = Hunter(config, id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 for expose in exposes: assert id_watch.is_processed(expose['id'])
def test_exposes_are_returned_with_limit(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) hunter.hunt_flats() saved = id_watch.get_recent_exposes(10) assert len(saved) == 10 expose = saved[0] assert expose['title'] is not None
def test_exposes_are_saved_to_maintainer(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10)) assert len(saved) > 0 assert count(exposes) < len(saved)
def test_addresses_are_processed_by_hunter(self): config = Config(string=self.DUMMY_CONFIG) config.set_searchers([DummyCrawler(addresses_as_links=True)]) hunter = Hunter(config, IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") for expose in exposes: self.assertFalse(expose['address'].startswith('http'), "Expected addresses to be processed by default")
def test_exposes_are_returned_as_dictionaries(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) hunter.hunt_flats() saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10)) assert len(saved) > 0 expose = saved[0] assert expose['title'] is not None assert expose['created_at'] is not None
def hunt_client(): app.config['TESTING'] = True with tempfile.NamedTemporaryFile(mode='w+') as temp_db: config = Config(string=DUMMY_CONFIG) config.set_searchers([DummyCrawler()]) app.config['HUNTER'] = WebHunter(config, IdMaintainer(temp_db.name)) app.config['BOT_TOKEN'] = "1234xxx.12345" app.secret_key = b'test_session_key' with app.test_client() as hunt_client: yield hunt_client
def test_exposes_are_returned_filtered(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) hunter.hunt_flats() hunter.hunt_flats() filter = Filter.builder().max_size_filter(70).build() saved = id_watch.get_recent_exposes(10, filter=filter) assert len(saved) == 10 for expose in saved: assert int(re.match(r'\d+', expose['size'])[0]) <= 70
def test_filter_min_rooms(self): min_rooms = 2 config = Config(string=self.FILTER_MIN_ROOMS_CONFIG) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") unfiltered = list(filter(lambda expose: float(re.search(r'\d+([\.,]\d+)?', expose['rooms'])[0]) < min_rooms, exposes)) if len(unfiltered) > 0: for expose in unfiltered: print("Got unfiltered expose: ", expose) self.assertTrue(len(unfiltered) == 0, "Expected flats with too few rooms to be filtered")
def test_filter_max_size(self): max_size = 80 config = Config(string=self.FILTER_MAX_SIZE_CONFIG) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") unfiltered = list(filter(lambda expose: float(re.search(r'\d+([\.,]\d+)?', expose['size'])[0]) > max_size, exposes)) if len(unfiltered) > 0: for expose in unfiltered: print("Got unfiltered expose: ", expose) self.assertTrue(len(unfiltered) == 0, "Expected big flats to be filtered")
def test_filter_titles(self): titlewords = [ "wg", "tausch", "flat", "ruhig", "gruen" ] filteredwords = [ "wg", "tausch", "wochenendheimfahrer", "pendler", "zwischenmiete" ] config = Config(string=self.FILTER_TITLES_CONFIG) config.set_searchers([DummyCrawler(titlewords)]) hunter = Hunter(config, IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") unfiltered = list(filter(lambda expose: any(word in expose['title'] for word in filteredwords), exposes)) if len(unfiltered) > 0: for expose in unfiltered: print("Got unfiltered expose: ", expose) self.assertTrue(len(unfiltered) == 0, "Expected words to be filtered")
def test_resolve_durations(self, m): config = Config(string=self.DUMMY_CONFIG) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, IdMaintainer(":memory:")) matcher = re.compile( 'maps.googleapis.com/maps/api/distancematrix/json') m.get( matcher, text= '{"status": "OK", "rows": [ { "elements": [ { "distance": { "text": "far", "value": 123 }, "duration": { "text": "days", "value": 123 } } ] } ]}' ) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") without_durations = list( filter(lambda expose: 'durations' not in expose, exposes)) if len(without_durations) > 0: for expose in without_durations: print("Got expose: ", expose) self.assertTrue( len(without_durations) == 0, "Expected durations to be calculated")