def test_hunt_flats(self): config = Config(string=self.DUMMY_CONFIG) hunter = Hunter(config, [CrawlImmowelt(Config(string=self.DUMMY_CONFIG))], IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 0, "Expected to find exposes")
def test_is_processed_works(mocker): config = Config(string=IdMaintainerTest.DUMMY_CONFIG) id_watch = IdMaintainer(":memory:") hunter = Hunter(config, [DummyCrawler()], id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 for expose in exposes: assert id_watch.is_processed(expose['id'])
def test_is_processed_works(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 for expose in exposes: assert id_watch.is_processed(expose['id'])
def test_ids_are_added_to_maintainer(mocker): config = Config(string=IdMaintainerTest.DUMMY_CONFIG) id_watch = IdMaintainer(":memory:") spy = mocker.spy(id_watch, "mark_processed") hunter = Hunter(config, [DummyCrawler()], id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 assert spy.call_count == 24
def test_exposes_are_returned_with_limit(): config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS) id_watch = IdMaintainer(":memory:") hunter = Hunter(config, [DummyCrawler()], id_watch) hunter.hunt_flats() saved = id_watch.get_recent_exposes(10) assert len(saved) == 10 expose = saved[0] assert expose['title'] is not None
def test_exposes_are_returned_with_limit(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) hunter.hunt_flats() saved = id_watch.get_recent_exposes(10) assert len(saved) == 10 expose = saved[0] assert expose['title'] is not None
def test_exposes_are_saved_to_maintainer(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10)) assert len(saved) > 0 assert count(exposes) < len(saved)
def test_exposes_are_saved_to_maintainer(): config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS) id_watch = IdMaintainer(":memory:") hunter = Hunter(config, [DummyCrawler()], id_watch) exposes = hunter.hunt_flats() assert count(exposes) > 4 saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10)) assert len(saved) > 0 assert count(exposes) < len(saved)
def test_exposes_are_returned_as_dictionaries(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) hunter.hunt_flats() saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10)) assert len(saved) > 0 expose = saved[0] assert expose['title'] is not None assert expose['created_at'] is not None
def test_exposes_are_returned_as_dictionaries(): config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS) id_watch = IdMaintainer(":memory:") hunter = Hunter(config, [DummyCrawler()], id_watch) hunter.hunt_flats() saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10)) assert len(saved) > 0 expose = saved[0] assert expose['title'] is not None assert expose['created_at'] is not None
def test_exposes_are_returned_filtered(id_watch): config = Config(string=CONFIG_WITH_FILTERS) config.set_searchers([DummyCrawler()]) hunter = Hunter(config, id_watch) hunter.hunt_flats() hunter.hunt_flats() filter = Filter.builder().max_size_filter(70).build() saved = id_watch.get_recent_exposes(10, filter=filter) assert len(saved) == 10 for expose in saved: assert int(re.match(r'\d+', expose['size'])[0]) <= 70
def test_exposes_are_returned_filtered(): config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS) id_watch = IdMaintainer(":memory:") hunter = Hunter(config, [DummyCrawler()], id_watch) hunter.hunt_flats() hunter.hunt_flats() filter = Filter.builder().max_size_filter(70).build() saved = id_watch.get_recent_exposes(10, filter_set=filter) assert len(saved) == 10 for expose in saved: assert int(re.match(r'\d+', expose['size'])[0]) <= 70
def test_filter_min_price(self): min_price = 700 config = Config(string=self.FILTER_MIN_PRICE_CONFIG) hunter = Hunter(config, [DummyCrawler()], IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") unfiltered = list( filter( lambda expose: float( re.search(r'\d+([\.,]\d+)?', expose['price'])[0]) < min_price, exposes)) if len(unfiltered) > 0: for expose in unfiltered: print("Got unfiltered expose: ", expose) self.assertTrue( len(unfiltered) == 0, "Expected cheap flats to be filtered")
def client(): app.config['TESTING'] = True with tempfile.NamedTemporaryFile(mode='w+') as temp_db: app.config['HUNTER'] = Hunter(Config(string=DUMMY_CONFIG), [CrawlImmowelt()], IdMaintainer(temp_db.name)) with app.test_client() as client: yield client
def launch_flat_hunt(config): id_watch = IdMaintainer('%s/processed_ids.db' % config.database_location()) hunter = Hunter(config, id_watch) hunter.hunt_flats() while config.get('loop', dict()).get('active', False): time.sleep(config.get('loop', dict()).get('sleeping_time', 60 * 10)) hunter.hunt_flats()
def launch_flat_hunt(config): id_watch = IdMaintainer('%s/processed_ids.db' % os.path.dirname(os.path.abspath(__file__))) hunter = Hunter(config, id_watch) hunter.hunt_flats() while config.get('loop', dict()).get('active', False): time.sleep(config.get('loop', dict()).get('sleeping_time', 60 * 10)) hunter.hunt_flats()
def launch_flat_hunt(config): searchers = [CrawlImmobilienscout(), CrawlWgGesucht(),CrawlEbayKleinanzeigen()] id_watch = IdMaintainer('%s/processed_ids.db' % os.path.dirname(os.path.abspath(__file__))) hunter = Hunter() hunter.hunt_flats(config, searchers, id_watch) while config.get('loop', dict()).get('active', False): time.sleep(config.get('loop', dict()).get('sleeping_time',60*10)) hunter.hunt_flats(config, searchers, id_watch)
def launch_flat_hunt(config): """Start the crawler loop""" id_watch = IdMaintainer('%s/processed_ids.db' % config.database_location()) hunter = Hunter(config, all_searchers(config), id_watch, RedisPubsub(config)) hunter.hunt_flats() while config.get('loop', dict()).get('active', False): time.sleep(config.get('loop', dict()).get('sleeping_time', 60 * 10)) hunter.hunt_flats()
class HunterTest(unittest.TestCase): DUMMY_CONFIG = """ urls: - https://www.immowelt.de/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc google_maps_api: key: SOME_KEY url: https://maps.googleapis.com/maps/api/distancematrix/json?origins={origin}&destinations={dest}&mode={mode}&sensor=true&key={key}&arrival_time={arrival} enable: true """ def setUp(self): self.hunter = Hunter(Config(string=self.DUMMY_CONFIG), [CrawlImmowelt()], IdMaintainer(":memory:")) def test_hunt_flats(self): exposes = self.hunter.hunt_flats() self.assertTrue(len(exposes) > 0, "Expected to find exposes")
from flathunter.crawl_immobilienscout import CrawlImmobilienscout from flathunter.crawl_wggesucht import CrawlWgGesucht from flathunter.crawl_immowelt import CrawlImmowelt from flathunter.idmaintainer import IdMaintainer from flathunter.googlecloud_idmaintainer import GoogleCloudIdMaintainer from flathunter.hunter import Hunter from flathunter.config import Config from flathunter.web import app searchers = [ CrawlImmobilienscout(), CrawlWgGesucht(), CrawlEbayKleinanzeigen(), CrawlImmowelt() ] if __name__ == '__main__': # Use the SQLite DB file if we are running locally id_watch = IdMaintainer('%s/processed_ids.db' % os.path.dirname(os.path.abspath(__file__))) else: # Use Google Cloud DB if we run on the cloud id_watch = GoogleCloudIdMaintainer() hunter = Hunter(Config(), searchers, id_watch) app.config["HUNTER"] = hunter if __name__ == '__main__': app.run(host='127.0.0.1', port=8080, debug=True)
def setUp(self): self.hunter = Hunter(Config(string=self.DUMMY_CONFIG), [CrawlImmowelt()], IdMaintainer(":memory:"))