Example #1
0
    def test_auto_increment(self):
        with session_scope(self.dbms) as session:
            vacancy_list = [
                Vacancies(platform=self.platform.name,
                          date=datetime(2019, 1, 2),
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien"),
                Vacancies(platform=self.platform.name,
                          date=datetime(2019, 1, 3),
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien")
            ]

            for v in vacancy_list:
                session.add(v)
                session.flush()

        with session_scope(self.dbms) as session:
            vacancies_instances_list = session.query(Vacancies).all()

            id_list = [entry.id for entry in vacancies_instances_list]
            self.assertEqual(id_list, [1, 2])

            title_list = [entry.title for entry in vacancies_instances_list]
            self.assertEqual(title_list, ['title', 'title'])
Example #2
0
def load_data_scripture(dbms, platform_registry):
    platform_json_file = os.path.join(ConfigHandler.ROOT_DIR, "tests/test_data/platform.json")
    vacancies_json_file = os.path.join(ConfigHandler.ROOT_DIR, "tests/test_data/vacancies.json")

    with session_scope(dbms=dbms) as session:

        with open(platform_json_file, 'r') as json_file:
            platform_file = json.loads(json_file.read())

            for row in platform_file:

                session.add(
                    Platform(name=row['name'], base_address=row['base_address'])
                )

        platform_registry.register_new_platform(KarriereATHandler)
        platform_registry.register_new_platform(StepStoneHandler)

        session.commit()
        session.flush()

        with open(vacancies_json_file, 'r') as json_file:
            platform_file = json.loads(json_file.read())

            for row in platform_file:
                date = datetime.strptime(row['date'], '%Y-%m-%d')
                session.add(
                    Vacancies(platform=row['platform'], search_topic=row['search_type'], date=date,
                              url=row['url'], company=row['company'], title=row['title'], location="")
                )
    def _save_vacancy_entries_to_database(self, vacancy_entries: list,
                                          search_topic: str) -> bool:
        with session_scope(self.dbms) as session:
            try:
                for entry in vacancy_entries:
                    # Check if the entry already exists in the database
                    exists_check = session.query(Vacancies.id).filter(
                        Vacancies.platform == entry['platform'],
                        Vacancies.company == entry['company'],
                        Vacancies.url == entry['url'],
                        Vacancies.title == entry['title'],
                        Vacancies.search_topic == search_topic,
                        Vacancies.date == entry['date'], Vacancies.location ==
                        entry['location']).scalar() is not None

                    # If it does not exist, add it
                    if not exists_check:
                        new_vacancy = Vacancies(platform=entry['platform'],
                                                company=entry['company'],
                                                url=entry['url'],
                                                title=entry['title'],
                                                search_topic=search_topic,
                                                date=entry['date'],
                                                location=entry['location'])
                        session.add(new_vacancy)

            except Exception as e:
                print(
                    f"{self.header}: ERROR: Could not insert new entries! Msg.: {str(e)}"
                )
                raise

        return True
Example #4
0
    def test_auto_delete_postings_after_x_days(self):
        # Reset Value to 14 (might be None due to other tests)
        ConfigHandler.POSTING_RETENTION_IN_DAYS = 30

        today = datetime.now()
        very_old_post_date = today - timedelta(
            days=ConfigHandler.POSTING_RETENTION_IN_DAYS)

        with session_scope(self.dbms) as session:
            vacancy_list = [
                Vacancies(platform=self.platform.name,
                          date=today,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien"),
                Vacancies(platform=self.platform.name,
                          date=very_old_post_date,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien")
            ]

            for v in vacancy_list:
                session.add(v)

        self.dbms.cleanup_job_postings_in_database()

        with session_scope(self.dbms) as session:
            vacancies_instances_list = session.query(Vacancies).all()

            # Only one should be left
            self.assertEqual(len(vacancies_instances_list), 1)

            # Only the new posting should be left
            self.assertEqual(vacancies_instances_list[0].date, today.date())
Example #5
0
    def test_insert_row_in_articles(self):
        with session_scope(self.dbms) as session:
            new_vacancy = Vacancies(platform=self.platform.name,
                                    date=datetime(2019, 1, 1),
                                    url="http://",
                                    title="title",
                                    company="aha",
                                    search_topic='Java',
                                    location="Wien")

            session.add(new_vacancy)
            session.flush()

        with session_scope(self.dbms) as session:
            vacancies_instances_list = session.query(Vacancies).all()

            self.assertEqual(len(vacancies_instances_list), 1)

            vacancies_instance = vacancies_instances_list[0]
            self.assertEqual(
                [vacancies_instance.id, vacancies_instance.platform],
                [1, 'test.at'])
    def test_inserting_an_already_existing_vacancy(self):
        """ Test that a newly scraped entry is skipped when it already exists in the database """
        with session_scope(dbms=self.dbms) as session:
            session.add(Vacancies(**self.vacancy_entries[0]))

        with session_scope(dbms=self.dbms) as session:
            vacancy_entries_query_set = session.query(Vacancies).all()

            # Check that only 1 row exists
            query_set_len = len(vacancy_entries_query_set)
            self.assertEqual(query_set_len, 1)

        self.karriere_at_handler._save_vacancy_entries_to_database(vacancy_entries=self.vacancy_entries,
                                                                   search_topic=self.search_topic)

        with session_scope(dbms=self.dbms) as session:
            vacancy_entries_query_set = session.query(Vacancies).all()

            query_set_len = len(vacancy_entries_query_set)

            # Count() should stay 1
            self.assertEqual(query_set_len, 1)
Example #7
0
    def test_disabled_retention_in_days(self):
        """Test if value of "disabled" disables auto-deletion"""
        # Reset Value to 14 (might be None due to other tests)
        ConfigHandler.POSTING_RETENTION_IN_DAYS = 30

        today = datetime.now()
        very_old_post_date = today - timedelta(
            days=ConfigHandler.POSTING_RETENTION_IN_DAYS)

        ConfigHandler.CONFIG_PATH = os.path.join(
            ConfigHandler.ROOT_DIR, 'tests', 'test_data', 'config_jsons',
            'config_disabled_retention_days.json')
        ConfigHandler.validate_config_file_base_variables()

        browser_handler = BrowserHandler()
        browser = browser_handler.get_browser()

        platform_registry = PlatformRegistry(browser=browser, dbms=self.dbms)
        platform_registry.register_new_platform(KarriereATHandler)
        platform_registry.create_platform_entries_in_database()

        ConfigHandler.validate_search_topics(
            platform_registry=platform_registry)

        browser_handler.close_browser()

        # Check if Posting-retention-in-days was correctly set to None
        self.assertEqual(ConfigHandler.POSTING_RETENTION_IN_DAYS, None)

        with session_scope(self.dbms) as session:
            vacancy_list = [
                Vacancies(platform=self.platform.name,
                          date=today,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien"),
                Vacancies(platform=self.platform.name,
                          date=very_old_post_date,
                          url="http://",
                          title="title",
                          company="aha",
                          search_topic='Java',
                          location="Wien")
            ]

            for v in vacancy_list:
                session.add(v)

        self.dbms.cleanup_job_postings_in_database()

        with session_scope(self.dbms) as session:
            vacancies_instances_list = session.query(Vacancies).all()

            # Auto-Deletion should be disabled - both entries should be present
            self.assertEqual(len(vacancies_instances_list), 2)

            # Auto-Deletion should be disabled - both entries should be present
            date_list = [row.date for row in vacancies_instances_list]
            self.assertEqual(
                date_list,
                [today.date(), very_old_post_date.date()])