Esempio n. 1
0
def client():
    app.config['TESTING'] = True
    with tempfile.NamedTemporaryFile(mode='w+') as temp_db:
        app.config['HUNTER'] = Hunter(Config(string=DUMMY_CONFIG),
                                      [CrawlImmowelt()],
                                      IdMaintainer(temp_db.name))

        with app.test_client() as client:
            yield client
Esempio n. 2
0
class Config:
    """Class to represent flathunter configuration"""

    __log__ = logging.getLogger('flathunt')
    __searchers__ = [
        CrawlImmobilienscout(),
        CrawlWgGesucht(),
        CrawlEbayKleinanzeigen(),
        CrawlImmowelt()
    ]

    def __init__(self, filename=None, string=None):
        if string is not None:
            self.config = yaml.safe_load(string)
            return
        if filename is None:
            filename = os.path.dirname(
                os.path.abspath(__file__)) + "/../config.yaml"
        self.__log__.info("Using config %s", filename)
        with open(filename) as file:
            self.config = yaml.safe_load(file)

    def __iter__(self):
        """Emulate dictionary"""
        return self.config.__iter__()

    def __getitem__(self, value):
        """Emulate dictionary"""
        return self.config[value]

    def get(self, key, value=None):
        """Emulate dictionary"""
        return self.config.get(key, value)

    def database_location(self):
        """Return the location of the database folder"""
        if "database_location" in self.config:
            return self.config["database_location"]
        return os.path.abspath(
            os.path.dirname(os.path.abspath(__file__)) + "/..")

    @staticmethod
    def set_searchers(searchers):
        """Update the active search plugins"""
        Config.__searchers__ = searchers

    @staticmethod
    def searchers():
        """Get the list of search plugins"""
        return Config.__searchers__

    def get_filter(self):
        """Read the configured filter"""
        builder = Filter.builder()
        builder.read_config(self.config)
        return builder.build()
Esempio n. 3
0
class ImmoweltCrawlerTest(unittest.TestCase):

    TEST_URL = 'https://www.immowelt.de/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc'

    def setUp(self):
        self.crawler = CrawlImmowelt()

    def test(self):
        soup = self.crawler.get_page(self.TEST_URL)
        self.assertIsNotNone(soup, "Should get a soup from the URL")
        entries = self.crawler.extract_data(soup)
        self.assertIsNotNone(entries, "Should parse entries from search URL")
        self.assertTrue(len(entries) > 0, "Should have at least one entry")
        self.assertTrue(entries[0]['id'] > 0, "Id should be parsed")
        self.assertTrue(
            entries[0]['url'].startswith("https://www.immowelt.de/expose"),
            u"URL should be an exposé link")
        for attr in ['title', 'price', 'size', 'rooms', 'address']:
            self.assertIsNotNone(entries[0][attr], attr + " should be set")
Esempio n. 4
0
class Config:

    __log__ = logging.getLogger(__name__)
    __searchers__ = [
        CrawlImmobilienscout(),
        CrawlWgGesucht(),
        CrawlEbayKleinanzeigen(),
        CrawlImmowelt()
    ]

    def __init__(self, filename=None, string=None):
        if string is not None:
            self.config = yaml.safe_load(string)
            return
        if filename is None:
            filename = os.path.dirname(
                os.path.abspath(__file__)) + "/../config.yaml"
        self.__log__.info("Using config %s" % filename)
        with open(filename) as file:
            self.config = yaml.safe_load(file)

    def __iter__(self):
        return self.config.__iter__()

    def __getitem__(self, value):
        return self.config[value]

    def get(self, key, value=None):
        return self.config.get(key, value)

    def database_location(self):
        if "database_location" in self.config:
            return self.config["database_location"]
        return os.path.abspath(
            os.path.dirname(os.path.abspath(__file__)) + "/..")

    @staticmethod
    def set_searchers(searchers):
        Config.__searchers__ = searchers

    @staticmethod
    def searchers():
        return Config.__searchers__

    def get_filter(self):
        builder = Filter.builder()
        builder.read_config(self.config)
        return builder.build()
Esempio n. 5
0
 def __init__(self, filename=None, string=None):
     if string is not None:
         self.config = yaml.safe_load(string)
     else:
         if filename is None:
             filename = os.path.dirname(os.path.abspath(__file__)) + "/../config.yaml"
         self.__log__.info("Using config %s", filename)
         with open(filename) as file:
             self.config = yaml.safe_load(file)
     self.__searchers__ = [CrawlImmobilienscout(self),
                           CrawlWgGesucht(self),
                           CrawlEbayKleinanzeigen(self),
                           CrawlImmowelt(self),
                           CrawlSubito(self),
                           CrawlImmobiliare(self),
                           CrawlIdealista(self)]
Esempio n. 6
0
def launch_flat_hunt(config):
    searchers = [
        CrawlImmobilienscout(),
        CrawlWgGesucht(),
        CrawlEbayKleinanzeigen(),
        CrawlImmowelt()
    ]
    id_watch = IdMaintainer('%s/processed_ids.db' %
                            os.path.dirname(os.path.abspath(__file__)))

    hunter = Hunter()
    hunter.hunt_flats(config, searchers, id_watch)

    while config.get('loop', dict()).get('active', False):
        time.sleep(config.get('loop', dict()).get('sleeping_time', 60 * 10))
        hunter.hunt_flats(config, searchers, id_watch)
Esempio n. 7
0
 def test_hunt_flats(self):
     config = Config(string=self.DUMMY_CONFIG)
     config.set_searchers([CrawlImmowelt(Config(string=self.DUMMY_CONFIG))])
     hunter = Hunter(config, IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 0, "Expected to find exposes")
Esempio n. 8
0
from flathunter.crawl_ebaykleinanzeigen import CrawlEbayKleinanzeigen
from flathunter.crawl_immobilienscout import CrawlImmobilienscout
from flathunter.crawl_wggesucht import CrawlWgGesucht
from flathunter.crawl_immowelt import CrawlImmowelt
from flathunter.idmaintainer import IdMaintainer
from flathunter.googlecloud_idmaintainer import GoogleCloudIdMaintainer
from flathunter.hunter import Hunter
from flathunter.config import Config

from flathunter.web import app

searchers = [
    CrawlImmobilienscout(),
    CrawlWgGesucht(),
    CrawlEbayKleinanzeigen(),
    CrawlImmowelt()
]
if __name__ == '__main__':
    # Use the SQLite DB file if we are running locally
    id_watch = IdMaintainer('%s/processed_ids.db' %
                            os.path.dirname(os.path.abspath(__file__)))
else:
    # Use Google Cloud DB if we run on the cloud
    id_watch = GoogleCloudIdMaintainer()

hunter = Hunter(Config(), searchers, id_watch)

app.config["HUNTER"] = hunter

if __name__ == '__main__':
    app.run(host='127.0.0.1', port=8080, debug=True)
Esempio n. 9
0
def crawler():
    return CrawlImmowelt(Config(string=DUMMY_CONFIG))
Esempio n. 10
0
 def setUp(self):
     self.hunter = Hunter(Config(string=self.DUMMY_CONFIG),
                          [CrawlImmowelt()], IdMaintainer(":memory:"))
Esempio n. 11
0
 def setUp(self):
     self.crawler = CrawlImmowelt()
Esempio n. 12
0
def crawler():
    return CrawlImmowelt()