예제 #1
0
def test_is_processed_works(mocker):
    config = Config(string=IdMaintainerTest.DUMMY_CONFIG)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    for expose in exposes:
        assert id_watch.is_processed(expose['id'])
예제 #2
0
def test_exposes_are_saved_to_maintainer():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10))
    assert len(saved) > 0
    assert count(exposes) < len(saved)
예제 #3
0
def test_exposes_are_returned_with_limit():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    hunter.hunt_flats()
    saved = id_watch.get_recent_exposes(10)
    assert len(saved) == 10
    expose = saved[0]
    assert expose['title'] is not None
예제 #4
0
def test_exposes_are_returned_as_dictionaries():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    hunter.hunt_flats()
    saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10))
    assert len(saved) > 0
    expose = saved[0]
    assert expose['title'] is not None
    assert expose['created_at'] is not None
예제 #5
0
def test_exposes_are_returned_filtered():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    hunter.hunt_flats()
    hunter.hunt_flats()
    filter = Filter.builder().max_size_filter(70).build()
    saved = id_watch.get_recent_exposes(10, filter_set=filter)
    assert len(saved) == 10
    for expose in saved:
        assert int(re.match(r'\d+', expose['size'])[0]) <= 70
def test_all_filters_can_be_loaded():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    filter = {'fish': 'cat'}
    hunter = WebHunter(config, id_watch)
    hunter.set_filters_for_user(123, filter)
    hunter.set_filters_for_user(124, filter)
    assert id_watch.get_user_settings() == [(123, {
        'filters': filter
    }), (124, {
        'filters': filter
    })]
def test_filters_for_user_are_saved():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    filter = {'fish': 'cat'}
    hunter = WebHunter(config, id_watch)
    hunter.set_filters_for_user(123, filter)
    assert hunter.get_filters_for_user(123) == filter
예제 #8
0
 def test_addresses_are_processed_by_hunter(self):
     config = Config(string=self.DUMMY_CONFIG)
     hunter = Hunter(config, [DummyCrawler(addresses_as_links=True)], IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     for expose in exposes:
         self.assertFalse(expose['address'].startswith('http'), "Expected addresses to be processed by default")
예제 #9
0
 def test_hunt_flats(self):
     config = Config(string=self.DUMMY_CONFIG)
     hunter = Hunter(config,
                     [CrawlImmowelt(Config(string=self.DUMMY_CONFIG))],
                     IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 0, "Expected to find exposes")
예제 #10
0
def test_ids_are_added_to_maintainer(mocker):
    config = Config(string=IdMaintainerTest.DUMMY_CONFIG)
    id_watch = IdMaintainer(":memory:")
    spy = mocker.spy(id_watch, "mark_processed")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    assert spy.call_count == 24
예제 #11
0
class IdMaintainerTest(unittest.TestCase):

    TEST_URL = 'https://www.immowelt.de/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc'

    def setUp(self):
        self.maintainer = IdMaintainer(":memory:")

    def test_read_from_empty_db(self):
        self.assertEqual(0, len(self.maintainer.get()),
                         "Expected empty db to return empty array")

    def test_read_after_write(self):
        self.maintainer.add(12345)
        self.assertEqual(12345,
                         self.maintainer.get()[0], "Expected ID to be saved")

    def test_get_last_run_time_none_by_default(self):
        self.assertIsNone(self.maintainer.get_last_run_time(),
                          "Expected last run time to be none")

    def test_get_list_run_time_is_updated(self):
        time = self.maintainer.update_last_run_time()
        self.assertIsNotNone(time, "Expected time not to be none")
        self.assertEqual(time, self.maintainer.get_last_run_time(),
                         "Expected last run time to be updated")
예제 #12
0
def launch_flat_hunt(config):
    id_watch = IdMaintainer('%s/processed_ids.db' % config.database_location())

    hunter = Hunter(config, id_watch)
    hunter.hunt_flats()

    while config.get('loop', dict()).get('active', False):
        time.sleep(config.get('loop', dict()).get('sleeping_time', 60 * 10))
        hunter.hunt_flats()
예제 #13
0
def client():
    app.config['TESTING'] = True
    with tempfile.NamedTemporaryFile(mode='w+') as temp_db:
        app.config['HUNTER'] = Hunter(Config(string=DUMMY_CONFIG),
                                      [CrawlImmowelt()],
                                      IdMaintainer(temp_db.name))

        with app.test_client() as client:
            yield client
예제 #14
0
def launch_flat_hunt(config):
    id_watch = IdMaintainer('%s/processed_ids.db' %
                            os.path.dirname(os.path.abspath(__file__)))

    hunter = Hunter(config, id_watch)
    hunter.hunt_flats()

    while config.get('loop', dict()).get('active', False):
        time.sleep(config.get('loop', dict()).get('sleeping_time', 60 * 10))
        hunter.hunt_flats()
예제 #15
0
def launch_flat_hunt(config):
    searchers = [CrawlImmobilienscout(), CrawlWgGesucht(),CrawlEbayKleinanzeigen()]
    id_watch = IdMaintainer('%s/processed_ids.db' % os.path.dirname(os.path.abspath(__file__)))

    hunter = Hunter()
    hunter.hunt_flats(config, searchers, id_watch)

    while config.get('loop', dict()).get('active', False):
        time.sleep(config.get('loop', dict()).get('sleeping_time',60*10))
        hunter.hunt_flats(config, searchers, id_watch)
예제 #16
0
def launch_flat_hunt(config):
    """Start the crawler loop"""
    id_watch = IdMaintainer('%s/processed_ids.db' % config.database_location())

    hunter = Hunter(config, all_searchers(config), id_watch,
                    RedisPubsub(config))
    hunter.hunt_flats()

    while config.get('loop', dict()).get('active', False):
        time.sleep(config.get('loop', dict()).get('sleeping_time', 60 * 10))
        hunter.hunt_flats()
예제 #17
0
def hunt_client():
    app.config['TESTING'] = True
    with tempfile.NamedTemporaryFile(mode='w+') as temp_db:
        config = Config(string=DUMMY_CONFIG)
        config.set_searchers([DummyCrawler()])
        app.config['HUNTER'] = WebHunter(config, IdMaintainer(temp_db.name))
        app.config['BOT_TOKEN'] = "1234xxx.12345"
        app.secret_key = b'test_session_key'

        with app.test_client() as hunt_client:
            yield hunt_client
예제 #18
0
 def test_filter_min_rooms(self):
     min_rooms = 2
     config = Config(string=self.FILTER_MIN_ROOMS_CONFIG)
     config.set_searchers([DummyCrawler()])
     hunter = Hunter(config, IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(filter(lambda expose: float(re.search(r'\d+([\.,]\d+)?', expose['rooms'])[0]) < min_rooms, exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(len(unfiltered) == 0, "Expected flats with too few rooms to be filtered")
예제 #19
0
 def test_filter_max_size(self):
     max_size = 80
     config = Config(string=self.FILTER_MAX_SIZE_CONFIG)
     config.set_searchers([DummyCrawler()])
     hunter = Hunter(config, IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(filter(lambda expose: float(re.search(r'\d+([\.,]\d+)?', expose['size'])[0]) > max_size, exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(len(unfiltered) == 0, "Expected big flats to be filtered")
예제 #20
0
class IdMaintainerTest(unittest.TestCase):
    TEST_URL = 'https://www.immowelt.de/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc'

    DUMMY_CONFIG = """
urls:
  - https://www.example.com/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc
    """

    CONFIG_WITH_FILTERS = """
urls:
  - https://www.example.com/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc

filters:
  max_price: 1000
    """

    def setUp(self):
        self.maintainer = IdMaintainer(":memory:")

    def test_read_after_write(self):
        self.maintainer.mark_processed(12345)
        self.assertTrue(self.maintainer.is_processed(12345), "Expected ID to be saved")

    def test_get_last_run_time_none_by_default(self):
        self.assertIsNone(self.maintainer.get_last_run_time(), "Expected last run time to be none")

    def test_get_list_run_time_is_updated(self):
        time = self.maintainer.update_last_run_time()
        self.assertIsNotNone(time, "Expected time not to be none")
        self.assertEqual(time, self.maintainer.get_last_run_time(), "Expected last run time to be updated")
예제 #21
0
 def test_filter_titles(self):
     titlewords = [ "wg", "tausch", "flat", "ruhig", "gruen" ]
     filteredwords = [ "wg", "tausch", "wochenendheimfahrer", "pendler", "zwischenmiete" ]
     config = Config(string=self.FILTER_TITLES_CONFIG)
     config.set_searchers([DummyCrawler(titlewords)])
     hunter = Hunter(config, IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(filter(lambda expose: any(word in expose['title'] for word in filteredwords), exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(len(unfiltered) == 0, "Expected words to be filtered")
예제 #22
0
def launch_flat_hunt(config, heartbeat=None):
    """Starts the crawler / notification loop"""
    id_watch = IdMaintainer('%s/processed_ids.db' % config.database_location())

    hunter = Hunter(config, id_watch)
    hunter.hunt_flats()
    counter = 0

    while config.get('loop', dict()).get('active', False):
        counter += 1
        counter = heartbeat.send_heartbeat(counter)
        time.sleep(config.get('loop', dict()).get('sleeping_time', 60 * 10))
        hunter.hunt_flats()
예제 #23
0
 def test_filter_min_price(self):
     min_price = 700
     config = Config(string=self.FILTER_MIN_PRICE_CONFIG)
     hunter = Hunter(config, [DummyCrawler()], IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(
         filter(
             lambda expose: float(
                 re.search(r'\d+([\.,]\d+)?', expose['price'])[0]) <
             min_price, exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(
         len(unfiltered) == 0, "Expected cheap flats to be filtered")
 def test_resolve_durations(self, m):
     config = Config(string=self.DUMMY_CONFIG)
     hunter = Hunter(config, [DummyCrawler()], IdMaintainer(":memory:"))
     matcher = re.compile(
         'maps.googleapis.com/maps/api/distancematrix/json')
     m.get(
         matcher,
         text=
         '{"status": "OK", "rows": [ { "elements": [ { "distance": { "text": "far", "value": 123 }, "duration": { "text": "days", "value": 123 } } ] } ]}'
     )
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     without_durations = list(
         filter(lambda expose: 'durations' not in expose, exposes))
     if len(without_durations) > 0:
         for expose in without_durations:
             print("Got expose: ", expose)
     self.assertTrue(
         len(without_durations) == 0, "Expected durations to be calculated")
예제 #25
0
 def setUp(self):
     self.maintainer = IdMaintainer(":memory:")
예제 #26
0
    def test_invalid_config(self):
        with self.assertRaises(Exception) as context:
            Hunter(dict(), IdMaintainer(":memory:"))

        self.assertTrue('Invalid config' in str(context.exception))
예제 #27
0
 def setUp(self):
     self.hunter = Hunter(Config(string=self.DUMMY_CONFIG),
                          [CrawlImmowelt()], IdMaintainer(":memory:"))
예제 #28
0
from flathunter.crawl_immobilienscout import CrawlImmobilienscout
from flathunter.crawl_wggesucht import CrawlWgGesucht
from flathunter.crawl_immowelt import CrawlImmowelt
from flathunter.idmaintainer import IdMaintainer
from flathunter.googlecloud_idmaintainer import GoogleCloudIdMaintainer
from flathunter.hunter import Hunter
from flathunter.config import Config

from flathunter.web import app

searchers = [
    CrawlImmobilienscout(),
    CrawlWgGesucht(),
    CrawlEbayKleinanzeigen(),
    CrawlImmowelt()
]
if __name__ == '__main__':
    # Use the SQLite DB file if we are running locally
    id_watch = IdMaintainer('%s/processed_ids.db' %
                            os.path.dirname(os.path.abspath(__file__)))
else:
    # Use Google Cloud DB if we run on the cloud
    id_watch = GoogleCloudIdMaintainer()

hunter = Hunter(Config(), searchers, id_watch)

app.config["HUNTER"] = hunter

if __name__ == '__main__':
    app.run(host='127.0.0.1', port=8080, debug=True)
예제 #29
0
# Startup file for Google Cloud deployment
##
import os

from flathunter.idmaintainer import IdMaintainer
from flathunter.googlecloud_idmaintainer import GoogleCloudIdMaintainer
from flathunter.web_hunter import WebHunter
from flathunter.config import Config

from flathunter.web import app

config = Config()

if __name__ == '__main__':
    # Use the SQLite DB file if we are running locally
    id_watch = IdMaintainer('%s/processed_ids.db' % config.database_location())
else:
    # Use Google Cloud DB if we run on the cloud
    id_watch = GoogleCloudIdMaintainer()

hunter = WebHunter(config, id_watch)

app.config["HUNTER"] = hunter
if 'website' in config:
    app.secret_key = config['website']['session_key']
    app.config["DOMAIN"] = config['website']['domain']
    app.config["BOT_NAME"] = config['website']['bot_name']
else:
    app.secret_key = b'Not a secret'
app.config["BOT_TOKEN"] = config['telegram']['bot_token']