class WgGesuchtCrawlerTest(unittest.TestCase): TEST_URL = 'https://www.wg-gesucht.de/wohnungen-in-Berlin.8.2.1.0.html?offer_filter=1&city_id=8&noDeact=1&categories%5B%5D=2&rent_types%5B%5D=0&sMin=70&rMax=1500&rmMin=2&fur=2&sin=2&exc=2&img_only=1' def setUp(self): self.crawler = CrawlWgGesucht() def test(self): soup = self.crawler.get_page(self.TEST_URL) self.assertIsNotNone(soup, "Should get a soup from the URL") entries = self.crawler.extract_data(soup) self.assertIsNotNone(entries, "Should parse entries from search URL") self.assertTrue(len(entries) > 0, "Should have at least one entry") self.assertTrue(entries[0]['id'] > 0, "Id should be parsed") self.assertTrue( entries[0]['url'].startswith( "https://www.wg-gesucht.de/wohnungen"), u"URL should be an apartment link") for attr in [ 'title', 'price', 'size', 'rooms', 'address', 'image', 'from' ]: self.assertIsNotNone(entries[0][attr], attr + " should be set") for attr in ['to']: found = reduce(lambda i, e: attr in e or i, entries, False) self.assertTrue(found, "Expected " + attr + " to sometimes be set")
class Config: """Class to represent flathunter configuration""" __log__ = logging.getLogger('flathunt') __searchers__ = [ CrawlImmobilienscout(), CrawlWgGesucht(), CrawlEbayKleinanzeigen(), CrawlImmowelt() ] def __init__(self, filename=None, string=None): if string is not None: self.config = yaml.safe_load(string) return if filename is None: filename = os.path.dirname( os.path.abspath(__file__)) + "/../config.yaml" self.__log__.info("Using config %s", filename) with open(filename) as file: self.config = yaml.safe_load(file) def __iter__(self): """Emulate dictionary""" return self.config.__iter__() def __getitem__(self, value): """Emulate dictionary""" return self.config[value] def get(self, key, value=None): """Emulate dictionary""" return self.config.get(key, value) def database_location(self): """Return the location of the database folder""" if "database_location" in self.config: return self.config["database_location"] return os.path.abspath( os.path.dirname(os.path.abspath(__file__)) + "/..") @staticmethod def set_searchers(searchers): """Update the active search plugins""" Config.__searchers__ = searchers @staticmethod def searchers(): """Get the list of search plugins""" return Config.__searchers__ def get_filter(self): """Read the configured filter""" builder = Filter.builder() builder.read_config(self.config) return builder.build()
def launch_flat_hunt(config): searchers = [CrawlImmobilienscout(), CrawlWgGesucht(),CrawlEbayKleinanzeigen()] id_watch = IdMaintainer('%s/processed_ids.db' % os.path.dirname(os.path.abspath(__file__))) hunter = Hunter() hunter.hunt_flats(config, searchers, id_watch) while config.get('loop', dict()).get('active', False): time.sleep(config.get('loop', dict()).get('sleeping_time',60*10)) hunter.hunt_flats(config, searchers, id_watch)
class Config: __log__ = logging.getLogger(__name__) __searchers__ = [ CrawlImmobilienscout(), CrawlWgGesucht(), CrawlEbayKleinanzeigen(), CrawlImmowelt() ] def __init__(self, filename=None, string=None): if string is not None: self.config = yaml.safe_load(string) return if filename is None: filename = os.path.dirname( os.path.abspath(__file__)) + "/../config.yaml" self.__log__.info("Using config %s" % filename) with open(filename) as file: self.config = yaml.safe_load(file) def __iter__(self): return self.config.__iter__() def __getitem__(self, value): return self.config[value] def get(self, key, value=None): return self.config.get(key, value) def database_location(self): if "database_location" in self.config: return self.config["database_location"] return os.path.abspath( os.path.dirname(os.path.abspath(__file__)) + "/..") @staticmethod def set_searchers(searchers): Config.__searchers__ = searchers @staticmethod def searchers(): return Config.__searchers__ def get_filter(self): builder = Filter.builder() builder.read_config(self.config) return builder.build()
def __init__(self, filename=None, string=None): if string is not None: self.config = yaml.safe_load(string) else: if filename is None: filename = os.path.dirname(os.path.abspath(__file__)) + "/../config.yaml" self.__log__.info("Using config %s", filename) with open(filename) as file: self.config = yaml.safe_load(file) self.__searchers__ = [CrawlImmobilienscout(self), CrawlWgGesucht(self), CrawlEbayKleinanzeigen(self), CrawlImmowelt(self), CrawlSubito(self), CrawlImmobiliare(self), CrawlIdealista(self)]
import os from flathunter.crawl_ebaykleinanzeigen import CrawlEbayKleinanzeigen from flathunter.crawl_immobilienscout import CrawlImmobilienscout from flathunter.crawl_wggesucht import CrawlWgGesucht from flathunter.crawl_immowelt import CrawlImmowelt from flathunter.idmaintainer import IdMaintainer from flathunter.googlecloud_idmaintainer import GoogleCloudIdMaintainer from flathunter.hunter import Hunter from flathunter.config import Config from flathunter.web import app searchers = [ CrawlImmobilienscout(), CrawlWgGesucht(), CrawlEbayKleinanzeigen(), CrawlImmowelt() ] if __name__ == '__main__': # Use the SQLite DB file if we are running locally id_watch = IdMaintainer('%s/processed_ids.db' % os.path.dirname(os.path.abspath(__file__))) else: # Use Google Cloud DB if we run on the cloud id_watch = GoogleCloudIdMaintainer() hunter = Hunter(Config(), searchers, id_watch) app.config["HUNTER"] = hunter
def setUp(self): self.crawler = CrawlWgGesucht()
def setUp(self): self.crawler = CrawlWgGesucht(Config(string=self.DUMMY_CONFIG))