def test_exposes_are_saved_to_maintainer(): config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS) id_watch = IdMaintainer(":memory:") hunter = Hunter(config, [DummyCrawler()], id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10)) assert len(saved) > 0 assert count(exposes) < len(saved)
def test_addresses_are_processed_by_hunter(self): config = Config(string=self.DUMMY_CONFIG) hunter = Hunter(config, [DummyCrawler(addresses_as_links=True)], IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") for expose in exposes: self.assertFalse(expose['address'].startswith('http'), "Expected addresses to be processed by default")
def test_hunt_flats(self): config = Config(string=self.DUMMY_CONFIG) hunter = Hunter(config, [CrawlImmowelt(Config(string=self.DUMMY_CONFIG))], IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 0, "Expected to find exposes")
def test_ids_are_added_to_maintainer(mocker): config = Config(string=IdMaintainerTest.DUMMY_CONFIG) id_watch = IdMaintainer(":memory:") spy = mocker.spy(id_watch, "mark_processed") hunter = Hunter(config, [DummyCrawler()], id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 assert spy.call_count == 24
def test_is_processed_works(mocker): config = Config(string=IdMaintainerTest.DUMMY_CONFIG) id_watch = IdMaintainer(":memory:") hunter = Hunter(config, [DummyCrawler()], id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 for expose in exposes: assert id_watch.is_processed(expose['id'])
def test_filter_min_size(self): min_size = 80 config = Config(string=self.FILTER_MIN_SIZE_CONFIG) hunter = Hunter(config, [DummyCrawler()], IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") unfiltered = list( filter( lambda expose: float( re.search(r'\d+([\.,]\d+)?', expose['size'])[0]) < min_size, exposes)) if len(unfiltered) > 0: for expose in unfiltered: print("Got unfiltered expose: ", expose) self.assertTrue( len(unfiltered) == 0, "Expected small flats to be filtered")
def test_filter_max_price(self): max_price = 1000 config = Config(string=self.FILTER_MAX_PRICE_CONFIG) hunter = Hunter(config, [DummyCrawler()], IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") unfiltered = list( filter( lambda expose: float( re.search(r'\d+([\.,]\d+)?', expose['price'])[0]) > max_price, exposes)) if len(unfiltered) > 0: for expose in unfiltered: print("Got unfiltered expose: ", expose) self.assertTrue( len(unfiltered) == 0, "Expected expensive flats to be filtered")
def test_filter_titles(self): titlewords = ["wg", "tausch", "flat", "ruhig", "gruen"] filteredwords = [ "wg", "tausch", "wochenendheimfahrer", "pendler", "zwischenmiete" ] config = Config(string=self.FILTER_TITLES_CONFIG) hunter = Hunter(config, [DummyCrawler(titlewords)], IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") unfiltered = list( filter( lambda expose: any(word in expose['title'] for word in filteredwords), exposes)) if len(unfiltered) > 0: for expose in unfiltered: print("Got unfiltered expose: ", expose) self.assertTrue(len(unfiltered) == 0, "Expected words to be filtered")
def test_resolve_durations(self, m): config = Config(string=self.DUMMY_CONFIG) hunter = Hunter(config, [DummyCrawler()], IdMaintainer(":memory:")) matcher = re.compile( 'maps.googleapis.com/maps/api/distancematrix/json') m.get( matcher, text= '{"status": "OK", "rows": [ { "elements": [ { "distance": { "text": "far", "value": 123 }, "duration": { "text": "days", "value": 123 } } ] } ]}' ) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") without_durations = list( filter(lambda expose: 'durations' not in expose, exposes)) if len(without_durations) > 0: for expose in without_durations: print("Got expose: ", expose) self.assertTrue( len(without_durations) == 0, "Expected durations to be calculated")
def test_dont_crawl_other_urls(crawler): exposes = crawler.crawl("https://www.example.com") assert count(exposes) == 0