Ejemplo n.º 1
0
 def test_defaults_fields(self):
     config = Config(string=self.FILTERS_CONFIG)
     self.assertIsNotNone(config)
     self.assertEqual(
         config.database_location(),
         os.path.abspath(
             os.path.dirname(os.path.abspath(__file__)) + "/.."))
Ejemplo n.º 2
0
 def test_hunt_flats(self):
     config = Config(string=self.DUMMY_CONFIG)
     hunter = Hunter(config,
                     [CrawlImmowelt(Config(string=self.DUMMY_CONFIG))],
                     IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 0, "Expected to find exposes")
Ejemplo n.º 3
0
 def test_loads_config_at_file(self):
     with tempfile.NamedTemporaryFile(mode='w+') as temp:
         temp.write(self.DUMMY_CONFIG)
         temp.flush()
         config = Config(temp.name)
     self.assertTrue(
         len(config.get('urls')) > 0, "Expected URLs in config file")
Ejemplo n.º 4
0
def main():
    """Processes command-line arguments, loads the config, launches the flathunter"""
    parser = argparse.ArgumentParser(description= \
                                         "Searches for flats on Immobilienscout24.de and wg-gesucht.de and sends " + \
                                         "results to Telegram User", epilog="Designed by Nody")
    parser.add_argument(
        '--config',
        '-c',
        type=argparse.FileType('r', encoding='UTF-8'),
        default='%s/config.yaml' % os.path.dirname(os.path.abspath(__file__)),
        help="Config file to use. If not set, try to use '%s/config.yaml' " %
        os.path.dirname(os.path.abspath(__file__)))
    args = parser.parse_known_args()[0]

    # load config
    config_handle = args.config
    config = Config(config_handle.name)

    # check config
    if not config.urls():
        __log__.warning("No urls configured. No crawling will be done.")

    # adjust log level, if required
    if config.get('verbose'):
        __log__.setLevel(logging.DEBUG)
        __log__.debug("Settings from config: %s", pformat(config))

    # start hunting for flats
    launch_flat_hunt(config)
def test_is_processed_works(id_watch):
    config = Config(string=CONFIG_WITH_FILTERS)
    config.set_searchers([DummyCrawler()])
    hunter = Hunter(config, id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    for expose in exposes:
        assert id_watch.is_processed(expose['id'])
def test_exposes_are_returned_with_limit(id_watch):
    config = Config(string=CONFIG_WITH_FILTERS)
    config.set_searchers([DummyCrawler()])
    hunter = Hunter(config, id_watch)
    hunter.hunt_flats()
    saved = id_watch.get_recent_exposes(10)
    assert len(saved) == 10
    expose = saved[0]
    assert expose['title'] is not None
def test_exposes_are_saved_to_maintainer(id_watch):
    config = Config(string=CONFIG_WITH_FILTERS)
    config.set_searchers([DummyCrawler()])
    hunter = Hunter(config, id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10))
    assert len(saved) > 0
    assert count(exposes) < len(saved)
def test_ids_are_added_to_maintainer(mocker):
    config = Config(string=IdMaintainerTest.DUMMY_CONFIG)
    config.set_searchers([DummyCrawler()])
    id_watch = IdMaintainer(":memory:")
    spy = mocker.spy(id_watch, "mark_processed")
    hunter = Hunter(config, id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    assert spy.call_count == 24
def test_is_processed_works(mocker):
    config = Config(string=IdMaintainerTest.DUMMY_CONFIG)
    config.set_searchers([DummyCrawler()])
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    for expose in exposes:
        assert id_watch.is_processed(expose['id'])
Ejemplo n.º 10
0
 def test_addresses_are_processed_by_hunter(self):
     config = Config(string=self.DUMMY_CONFIG)
     config.set_searchers([DummyCrawler(addresses_as_links=True)])
     hunter = Hunter(config, IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     for expose in exposes:
         self.assertFalse(expose['address'].startswith('http'),
                          "Expected addresses to be processed by default")
def test_exposes_are_returned_as_dictionaries(id_watch):
    config = Config(string=CONFIG_WITH_FILTERS)
    config.set_searchers([DummyCrawler()])
    hunter = Hunter(config, id_watch)
    hunter.hunt_flats()
    saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10))
    assert len(saved) > 0
    expose = saved[0]
    assert expose['title'] is not None
    assert expose['created_at'] is not None
Ejemplo n.º 12
0
def hunt_client():
    app.config['TESTING'] = True
    with tempfile.NamedTemporaryFile(mode='w+') as temp_db:
        config = Config(string=DUMMY_CONFIG)
        config.set_searchers([DummyCrawler()])
        app.config['HUNTER'] = WebHunter(config, IdMaintainer(temp_db.name))
        app.config['BOT_TOKEN'] = "1234xxx.12345"
        app.secret_key = b'test_session_key'

        with app.test_client() as hunt_client:
            yield hunt_client
def test_exposes_are_returned_filtered(id_watch):
    config = Config(string=CONFIG_WITH_FILTERS)
    config.set_searchers([DummyCrawler()])
    hunter = Hunter(config, id_watch)
    hunter.hunt_flats()
    hunter.hunt_flats()
    filter = Filter.builder().max_size_filter(70).build()
    saved = id_watch.get_recent_exposes(10, filter=filter)
    assert len(saved) == 10
    for expose in saved:
        assert int(re.match(r'\d+', expose['size'])[0]) <= 70
Ejemplo n.º 14
0
 def test_filter_min_rooms(self):
     min_rooms = 2
     config = Config(string=self.FILTER_MIN_ROOMS_CONFIG)
     config.set_searchers([DummyCrawler()])
     hunter = Hunter(config, IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(filter(lambda expose: float(re.search(r'\d+([\.,]\d+)?', expose['rooms'])[0]) < min_rooms, exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(len(unfiltered) == 0, "Expected flats with too few rooms to be filtered")
Ejemplo n.º 15
0
 def test_loads_config(self):
     created = False
     if not os.path.isfile("config.yaml"):
         config_file = open("config.yaml", "w")
         config_file.write(self.DUMMY_CONFIG)
         config_file.flush()
         config_file.close()
         created = True
     config = Config()
     self.assertTrue(len(config.get('urls')) > 0, "Expected URLs in config file")
     if created:
         os.remove("config.yaml")
Ejemplo n.º 16
0
 def test_filter_max_size(self):
     max_size = 80
     config = Config(string=self.FILTER_MAX_SIZE_CONFIG)
     config.set_searchers([DummyCrawler()])
     hunter = Hunter(config, IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(filter(lambda expose: float(re.search(r'\d+([\.,]\d+)?', expose['size'])[0]) > max_size, exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(len(unfiltered) == 0, "Expected big flats to be filtered")
Ejemplo n.º 17
0
 def test_filter_titles(self):
     titlewords = [ "wg", "tausch", "flat", "ruhig", "gruen" ]
     filteredwords = [ "wg", "tausch", "wochenendheimfahrer", "pendler", "zwischenmiete" ]
     config = Config(string=self.FILTER_TITLES_CONFIG)
     config.set_searchers([DummyCrawler(titlewords)])
     hunter = Hunter(config, IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(filter(lambda expose: any(word in expose['title'] for word in filteredwords), exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(len(unfiltered) == 0, "Expected words to be filtered")
Ejemplo n.º 18
0
 def test_address_processor(self):
     crawler = DummyCrawler(addresses_as_links=True)
     config = Config(string=self.DUMMY_CONFIG)
     config.set_searchers([crawler])
     exposes = crawler.get_results("https://www.example.com/search")
     for expose in exposes:
         self.assertTrue(expose['address'].startswith('http'),
                         "Expected addresses not yet to be processed")
     chain = ProcessorChain.builder(Config(string=self.DUMMY_CONFIG)) \
                           .resolve_addresses() \
                           .build()
     exposes = chain.process(exposes)
     for expose in exposes:
         self.assertFalse(expose['address'].startswith('http'),
                          "Expected addresses to be processed")
def test_filters_for_user_are_saved():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    filter = {'fish': 'cat'}
    hunter = WebHunter(config, id_watch)
    hunter.set_filters_for_user(123, filter)
    assert hunter.get_filters_for_user(123) == filter
def test_all_filters_can_be_loaded(id_watch):
    filter = { 'fish': 'cat' }
    config = Config(string=CONFIG_WITH_FILTERS)
    hunter = WebHunter(config, id_watch)
    hunter.set_filters_for_user(123, filter)
    hunter.set_filters_for_user(124, filter)
    assert id_watch.get_user_settings() == [ (123, { 'filters': filter }), (124, { 'filters': filter }) ]
 def __init__(self):
     project_id = Config().get('google_cloud_project_id')
     if project_id is None:
         raise Exception(
             "Need to project a google_cloud_project_id in config.yaml")
     firebase_admin.initialize_app(credentials.ApplicationDefault(),
                                   {'projectId': project_id})
     self.db = firestore.client()
Ejemplo n.º 22
0
def client():
    app.config['TESTING'] = True
    with tempfile.NamedTemporaryFile(mode='w+') as temp_db:
        app.config['HUNTER'] = Hunter(Config(string=DUMMY_CONFIG),
                                      [CrawlImmowelt()],
                                      IdMaintainer(temp_db.name))

        with app.test_client() as client:
            yield client
Ejemplo n.º 23
0
def test_exposes_are_returned_with_limit():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    hunter.hunt_flats()
    saved = id_watch.get_recent_exposes(10)
    assert len(saved) == 10
    expose = saved[0]
    assert expose['title'] is not None
Ejemplo n.º 24
0
def test_exposes_are_saved_to_maintainer():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10))
    assert len(saved) > 0
    assert count(exposes) < len(saved)
Ejemplo n.º 25
0
def test_exposes_are_returned_as_dictionaries():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    hunter.hunt_flats()
    saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10))
    assert len(saved) > 0
    expose = saved[0]
    assert expose['title'] is not None
    assert expose['created_at'] is not None
Ejemplo n.º 26
0
def test_exposes_are_returned_filtered():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    hunter.hunt_flats()
    hunter.hunt_flats()
    filter = Filter.builder().max_size_filter(70).build()
    saved = id_watch.get_recent_exposes(10, filter_set=filter)
    assert len(saved) == 10
    for expose in saved:
        assert int(re.match(r'\d+', expose['size'])[0]) <= 70
Ejemplo n.º 27
0
 def test_resolve_durations(self, m):
     config = Config(string=self.DUMMY_CONFIG)
     config.set_searchers([DummyCrawler()])
     hunter = Hunter(config, IdMaintainer(":memory:"))
     matcher = re.compile(
         'maps.googleapis.com/maps/api/distancematrix/json')
     m.get(
         matcher,
         text=
         '{"status": "OK", "rows": [ { "elements": [ { "distance": { "text": "far", "value": 123 }, "duration": { "text": "days", "value": 123 } } ] } ]}'
     )
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     without_durations = list(
         filter(lambda expose: 'durations' not in expose, exposes))
     if len(without_durations) > 0:
         for expose in without_durations:
             print("Got expose: ", expose)
     self.assertTrue(
         len(without_durations) == 0, "Expected durations to be calculated")
def test_all_filters_can_be_loaded():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    filter = {'fish': 'cat'}
    hunter = WebHunter(config, id_watch)
    hunter.set_filters_for_user(123, filter)
    hunter.set_filters_for_user(124, filter)
    assert id_watch.get_user_settings() == [(123, {
        'filters': filter
    }), (124, {
        'filters': filter
    })]
Ejemplo n.º 29
0
def main():
    """Processes command-line arguments, loads the config, launches the flathunter"""
    parser = argparse.ArgumentParser(description= \
                                         "Searches for flats on Immobilienscout24.de and wg-gesucht.de and sends " + \
                                         "results to Telegram User", epilog="Designed by Nody")
    parser.add_argument('--config', '-c',
                        type=argparse.FileType('r', encoding='UTF-8'),
                        default='%s/config.yaml' % os.path.dirname(os.path.abspath(__file__)),
                        help="Config file to use. If not set, try to use '%s/config.yaml' " %
                             os.path.dirname(os.path.abspath(__file__))
                        )
    parser.add_argument('--heartbeat', '-hb',
                        action='store',
                        default=None,
                        help='Set the interval time to receive heartbeat messages to check that the bot is' + \
                             'alive. Accepted strings are "hour", "day", "week". Defaults to None.'
                        )
    args = parser.parse_args()

    # load config
    config_handle = args.config
    config = Config(config_handle.name)

    # check config
    notifiers = config.get('notifiers', list())
    if 'mattermost' in notifiers \
            and not config.get('mattermost', dict()).get('webhook_url'):
        __log__.error("No mattermost webhook configured. Starting like this would be pointless...")
        return
    if 'telegram' in notifiers:
        if not config.get('telegram', dict()).get('bot_token'):
            __log__.error("No telegram bot token configured. Starting like this would be pointless...")
            return
        if not config.get('telegram', dict()).get('receiver_ids'):
            __log__.warning("No telegram receivers configured - nobody will get notifications.")
    if not config.get('urls'):
        __log__.error("No urls configured. Starting like this would be meaningless...")
        return

    # get heartbeat instructions
    heartbeat_interval = args.heartbeat
    heartbeat = Heartbeat(config, heartbeat_interval)

    # adjust log level, if required
    if config.get('verbose'):
        __log__.setLevel(logging.DEBUG)
        __log__.debug("Settings from config: %s", pformat(config))

    # start hunting for flats
    launch_flat_hunt(config, heartbeat)
Ejemplo n.º 30
0
def main():
    """Processes command-line arguments, loads the config, launches the flathunter"""
    parser = argparse.ArgumentParser(description= \
                                         "Searches for flats on Immobilienscout24.de and wg-gesucht.de and sends " + \
                                         "results to Telegram User", epilog="Designed by Nody")
    parser.add_argument(
        '--config',
        '-c',
        type=argparse.FileType('r', encoding='UTF-8'),
        default='%s/config.yaml' % os.path.dirname(os.path.abspath(__file__)),
        help="Config file to use. If not set, try to use '%s/config.yaml' " %
        os.path.dirname(os.path.abspath(__file__)))
    args = parser.parse_known_args()[0]

    # load config
    config_handle = args.config
    config = Config(config_handle.name)

    # check config
    if not config.get('telegram', dict()).get('bot_token'):
        __log__.error(
            "No telegram bot token configured. Starting like this would be pointless..."
        )
        return
    if not config.get('telegram', dict()).get('receiver_ids'):
        __log__.warning(
            "No telegram receivers configured - nobody will get notifications."
        )

    # adjust log level, if required
    if config.get('verbose'):
        __log__.setLevel(logging.DEBUG)
        __log__.debug("Settings from config: %s", pformat(config))

    # start sending messages
    telegram_sender = SenderTelegram(config, RedisPubsub(config))
    telegram_sender.wait_and_process()