Ejemplo n.º 1
0
    def setUpClass(cls):
        cls.dbms = DBHandler(DBHandler.SQLITE, db_name=TEST_DB_NAME)

        cls.browser_handler = BrowserHandler()
        cls.browser = cls.browser_handler.get_browser()

        cls.platform_registry = PlatformRegistry(browser=cls.browser, dbms=cls.dbms)
        cls.platform_registry.register_new_platform(KarriereATHandler)
        cls.platform_registry.register_new_platform(StepStoneHandler)
Ejemplo n.º 2
0
    def setUpClass(cls):
        cls.dbms = DBHandler(DBHandler.SQLITE, db_name=TEST_DB_NAME)
        cls.dbms.create_database_and_tables()

        cls.browser_handler = BrowserHandler()
        cls.browser = cls.browser_handler.get_browser()

        cls.platform_registry = PlatformRegistry(browser=cls.browser,
                                                 dbms=cls.dbms)

        # Reset registered list (may be populated from other tests
        cls.platform_registry.de_register_all_platforms()

        cls.platform_registry.register_new_platform(KarriereATHandler)
        cls.platform_registry.register_new_platform(StepStoneHandler)

        super().setUpClass()
Ejemplo n.º 3
0
    def setUpClass(cls):
        cls.dbms = DBHandler(DBHandler.SQLITE, db_name=TEST_DB_NAME)
        cls.dbms.create_database_and_tables()
        cls.browser_handler = BrowserHandler()
        cls.browser = cls.browser_handler.get_browser()

        cls.platform_registry = PlatformRegistry(browser=cls.browser, dbms=cls.dbms)
        cls.platform_registry.register_new_platform(KarriereATHandler)

        # Create db entries for the registered platform
        cls.platform_registry.create_platform_entries_in_database()

        cls.karriere_at_handler = KarriereATHandler(browser=cls.browser, dbms=cls.dbms)

        cls.search_topic = "Python"
        cls.vacancy_entries = [
            {"platform": KarriereATHandler.platform_name, "company": "reynholm industries",
             "url": "https://www.reynholm.co.uk/", "title": "Head of Everything",
             "search_topic": cls.search_topic, "date": datetime(2019, 3, 30).date(), "location": "London"}
        ]
        super().setUpClass()
Ejemplo n.º 4
0
    def test_disabled_retention_in_days(self):
        """Test if value of "disabled" disables auto-deletion"""
        # Reset Value to 14 (might be None due to other tests)
        ConfigHandler.POSTING_RETENTION_IN_DAYS = 30

        today = datetime.now()
        very_old_post_date = today - timedelta(
            days=ConfigHandler.POSTING_RETENTION_IN_DAYS)

        ConfigHandler.CONFIG_PATH = os.path.join(
            ConfigHandler.ROOT_DIR, 'tests', 'test_data', 'config_jsons',
            'config_disabled_retention_days.json')
        ConfigHandler.validate_config_file_base_variables()

        browser_handler = BrowserHandler()
        browser = browser_handler.get_browser()

        platform_registry = PlatformRegistry(browser=browser, dbms=self.dbms)
        platform_registry.register_new_platform(KarriereATHandler)
        platform_registry.create_platform_entries_in_database()

        ConfigHandler.validate_search_topics(
            platform_registry=platform_registry)

        browser_handler.close_browser()

        # Check if Posting-retention-in-days was correctly set to None
        self.assertEqual(ConfigHandler.POSTING_RETENTION_IN_DAYS, None)

        with session_scope(self.dbms) as session:
            vacancy_list = [
                Vacancies(platform=self.platform.name,
                          date=today,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien"),
                Vacancies(platform=self.platform.name,
                          date=very_old_post_date,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien")
            ]

            for v in vacancy_list:
                session.add(v)

        self.dbms.cleanup_job_postings_in_database()

        with session_scope(self.dbms) as session:
            vacancies_instances_list = session.query(Vacancies).all()

            # Auto-Deletion should be disabled - both entries should be present
            self.assertEqual(len(vacancies_instances_list), 2)

            # Auto-Deletion should be disabled - both entries should be present
            date_list = [row.date for row in vacancies_instances_list]
            self.assertEqual(
                date_list,
                [today.date(), very_old_post_date.date()])
Ejemplo n.º 5
0
def main():
    # Initialize CL-Parser
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--platforms',
        action='store_true',
        help=
        'Prints all available platforms for scraping. Each platform can be added in the '
        'config.json-File for scraping.')
    parser.add_argument(
        '-v',
        action='store_true',
        help='Verbose-mode. Prints all generated messages. Used for debugging.'
    )

    args = parser.parse_args()
    print(args)
    # Parse and set Config-Values from config.json
    ConfigHandler.validate_config_file_base_variables()
    ConfigHandler.set_verbosity_level(args.v)

    # Initialize Base Classes
    dbms = DBHandler(DBHandler.SQLITE, db_name='job_scraper.sqlite')
    dbms.create_database_and_tables()

    browser_handler = BrowserHandler()
    browser = browser_handler.get_browser()

    platform_registry = PlatformRegistry(browser=browser, dbms=dbms)

    _register_platforms(platform_registry=platform_registry)

    # If -platforms-flag is set, display all registered platforms and exit
    if args.platforms:
        print("\nRegistered Platforms:\n-----------------")
        for platform in PlatformRegistry.registered_platforms:
            print(platform.platform_name)
        sys.exit(0)

    # Validate Search-Topics and URLs in config-file comparing it to the registered platforms
    ConfigHandler.validate_search_topics(platform_registry=platform_registry)

    # Clean up database and remove job-postings that are older than the retention-range
    dbms.cleanup_job_postings_in_database()

    # Check if all registered platforms exist as entries in the database
    platform_registry.create_platform_entries_in_database()

    # Perform web-scraping
    for platform_name in ConfigHandler.search_types_and_urls.keys():
        platform = platform_registry.get_platform_instance(platform_name)

        for search_topic, search_url in ConfigHandler.search_types_and_urls[
                platform_name].items():
            platform.run(search_topic=search_topic, search_url=search_url)

    browser_handler.close_browser()

    # Print fetched vacancies from db to HTML
    result_printer = ResultPrinter(dbms=dbms,
                                   platform_registry=platform_registry)
    result_printer.print_result_to_html(
        seach_topic_list=ConfigHandler.search_topics,
        open_html_after_finish=True)
Ejemplo n.º 6
0
def _register_platforms(platform_registry: PlatformRegistry):
    """ Register and instantiate all platforms that are implemented and stable so that they can be used """
    platform_registry.register_new_platform(KarriereATHandler)
    platform_registry.register_new_platform(StepStoneHandler)
    platform_registry.register_new_platform(MonsterATHandler)
    platform_registry.register_new_platform(JobsATHandler)
Ejemplo n.º 7
0
    def test_singleton_restriction(self):
        """Test if PlatformRegistry can only be instantiated once"""
        second_platform_registry = PlatformRegistry(browser=self.browser,
                                                    dbms=self.dbms)

        self.assertEqual(self.platform_registry, second_platform_registry)