def client(): app.config['TESTING'] = True with tempfile.NamedTemporaryFile(mode='w+') as temp_db: app.config['HUNTER'] = Hunter(Config(string=DUMMY_CONFIG), [CrawlImmowelt()], IdMaintainer(temp_db.name)) with app.test_client() as client: yield client
class Config: """Class to represent flathunter configuration""" __log__ = logging.getLogger('flathunt') __searchers__ = [ CrawlImmobilienscout(), CrawlWgGesucht(), CrawlEbayKleinanzeigen(), CrawlImmowelt() ] def __init__(self, filename=None, string=None): if string is not None: self.config = yaml.safe_load(string) return if filename is None: filename = os.path.dirname( os.path.abspath(__file__)) + "/../config.yaml" self.__log__.info("Using config %s", filename) with open(filename) as file: self.config = yaml.safe_load(file) def __iter__(self): """Emulate dictionary""" return self.config.__iter__() def __getitem__(self, value): """Emulate dictionary""" return self.config[value] def get(self, key, value=None): """Emulate dictionary""" return self.config.get(key, value) def database_location(self): """Return the location of the database folder""" if "database_location" in self.config: return self.config["database_location"] return os.path.abspath( os.path.dirname(os.path.abspath(__file__)) + "/..") @staticmethod def set_searchers(searchers): """Update the active search plugins""" Config.__searchers__ = searchers @staticmethod def searchers(): """Get the list of search plugins""" return Config.__searchers__ def get_filter(self): """Read the configured filter""" builder = Filter.builder() builder.read_config(self.config) return builder.build()
class ImmoweltCrawlerTest(unittest.TestCase): TEST_URL = 'https://www.immowelt.de/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc' def setUp(self): self.crawler = CrawlImmowelt() def test(self): soup = self.crawler.get_page(self.TEST_URL) self.assertIsNotNone(soup, "Should get a soup from the URL") entries = self.crawler.extract_data(soup) self.assertIsNotNone(entries, "Should parse entries from search URL") self.assertTrue(len(entries) > 0, "Should have at least one entry") self.assertTrue(entries[0]['id'] > 0, "Id should be parsed") self.assertTrue( entries[0]['url'].startswith("https://www.immowelt.de/expose"), u"URL should be an exposé link") for attr in ['title', 'price', 'size', 'rooms', 'address']: self.assertIsNotNone(entries[0][attr], attr + " should be set")
class Config: __log__ = logging.getLogger(__name__) __searchers__ = [ CrawlImmobilienscout(), CrawlWgGesucht(), CrawlEbayKleinanzeigen(), CrawlImmowelt() ] def __init__(self, filename=None, string=None): if string is not None: self.config = yaml.safe_load(string) return if filename is None: filename = os.path.dirname( os.path.abspath(__file__)) + "/../config.yaml" self.__log__.info("Using config %s" % filename) with open(filename) as file: self.config = yaml.safe_load(file) def __iter__(self): return self.config.__iter__() def __getitem__(self, value): return self.config[value] def get(self, key, value=None): return self.config.get(key, value) def database_location(self): if "database_location" in self.config: return self.config["database_location"] return os.path.abspath( os.path.dirname(os.path.abspath(__file__)) + "/..") @staticmethod def set_searchers(searchers): Config.__searchers__ = searchers @staticmethod def searchers(): return Config.__searchers__ def get_filter(self): builder = Filter.builder() builder.read_config(self.config) return builder.build()
def __init__(self, filename=None, string=None): if string is not None: self.config = yaml.safe_load(string) else: if filename is None: filename = os.path.dirname(os.path.abspath(__file__)) + "/../config.yaml" self.__log__.info("Using config %s", filename) with open(filename) as file: self.config = yaml.safe_load(file) self.__searchers__ = [CrawlImmobilienscout(self), CrawlWgGesucht(self), CrawlEbayKleinanzeigen(self), CrawlImmowelt(self), CrawlSubito(self), CrawlImmobiliare(self), CrawlIdealista(self)]
def launch_flat_hunt(config): searchers = [ CrawlImmobilienscout(), CrawlWgGesucht(), CrawlEbayKleinanzeigen(), CrawlImmowelt() ] id_watch = IdMaintainer('%s/processed_ids.db' % os.path.dirname(os.path.abspath(__file__))) hunter = Hunter() hunter.hunt_flats(config, searchers, id_watch) while config.get('loop', dict()).get('active', False): time.sleep(config.get('loop', dict()).get('sleeping_time', 60 * 10)) hunter.hunt_flats(config, searchers, id_watch)
def test_hunt_flats(self): config = Config(string=self.DUMMY_CONFIG) config.set_searchers([CrawlImmowelt(Config(string=self.DUMMY_CONFIG))]) hunter = Hunter(config, IdMaintainer(":memory:")) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 0, "Expected to find exposes")
from flathunter.crawl_ebaykleinanzeigen import CrawlEbayKleinanzeigen from flathunter.crawl_immobilienscout import CrawlImmobilienscout from flathunter.crawl_wggesucht import CrawlWgGesucht from flathunter.crawl_immowelt import CrawlImmowelt from flathunter.idmaintainer import IdMaintainer from flathunter.googlecloud_idmaintainer import GoogleCloudIdMaintainer from flathunter.hunter import Hunter from flathunter.config import Config from flathunter.web import app searchers = [ CrawlImmobilienscout(), CrawlWgGesucht(), CrawlEbayKleinanzeigen(), CrawlImmowelt() ] if __name__ == '__main__': # Use the SQLite DB file if we are running locally id_watch = IdMaintainer('%s/processed_ids.db' % os.path.dirname(os.path.abspath(__file__))) else: # Use Google Cloud DB if we run on the cloud id_watch = GoogleCloudIdMaintainer() hunter = Hunter(Config(), searchers, id_watch) app.config["HUNTER"] = hunter if __name__ == '__main__': app.run(host='127.0.0.1', port=8080, debug=True)
def crawler(): return CrawlImmowelt(Config(string=DUMMY_CONFIG))
def setUp(self): self.hunter = Hunter(Config(string=self.DUMMY_CONFIG), [CrawlImmowelt()], IdMaintainer(":memory:"))
def setUp(self): self.crawler = CrawlImmowelt()
def crawler(): return CrawlImmowelt()