def test_auto_increment(self): with session_scope(self.dbms) as session: vacancy_list = [ Vacancies(platform=self.platform.name, date=datetime(2019, 1, 2), url="http://", title="title", company="aha", search_topic='Java', location="Wien"), Vacancies(platform=self.platform.name, date=datetime(2019, 1, 3), url="http://", title="title", company="aha", search_topic='Java', location="Wien") ] for v in vacancy_list: session.add(v) session.flush() with session_scope(self.dbms) as session: vacancies_instances_list = session.query(Vacancies).all() id_list = [entry.id for entry in vacancies_instances_list] self.assertEqual(id_list, [1, 2]) title_list = [entry.title for entry in vacancies_instances_list] self.assertEqual(title_list, ['title', 'title'])
def load_data_scripture(dbms, platform_registry): platform_json_file = os.path.join(ConfigHandler.ROOT_DIR, "tests/test_data/platform.json") vacancies_json_file = os.path.join(ConfigHandler.ROOT_DIR, "tests/test_data/vacancies.json") with session_scope(dbms=dbms) as session: with open(platform_json_file, 'r') as json_file: platform_file = json.loads(json_file.read()) for row in platform_file: session.add( Platform(name=row['name'], base_address=row['base_address']) ) platform_registry.register_new_platform(KarriereATHandler) platform_registry.register_new_platform(StepStoneHandler) session.commit() session.flush() with open(vacancies_json_file, 'r') as json_file: platform_file = json.loads(json_file.read()) for row in platform_file: date = datetime.strptime(row['date'], '%Y-%m-%d') session.add( Vacancies(platform=row['platform'], search_topic=row['search_type'], date=date, url=row['url'], company=row['company'], title=row['title'], location="") )
def _save_vacancy_entries_to_database(self, vacancy_entries: list, search_topic: str) -> bool: with session_scope(self.dbms) as session: try: for entry in vacancy_entries: # Check if the entry already exists in the database exists_check = session.query(Vacancies.id).filter( Vacancies.platform == entry['platform'], Vacancies.company == entry['company'], Vacancies.url == entry['url'], Vacancies.title == entry['title'], Vacancies.search_topic == search_topic, Vacancies.date == entry['date'], Vacancies.location == entry['location']).scalar() is not None # If it does not exist, add it if not exists_check: new_vacancy = Vacancies(platform=entry['platform'], company=entry['company'], url=entry['url'], title=entry['title'], search_topic=search_topic, date=entry['date'], location=entry['location']) session.add(new_vacancy) except Exception as e: print( f"{self.header}: ERROR: Could not insert new entries! Msg.: {str(e)}" ) raise return True
def test_auto_delete_postings_after_x_days(self): # Reset Value to 14 (might be None due to other tests) ConfigHandler.POSTING_RETENTION_IN_DAYS = 30 today = datetime.now() very_old_post_date = today - timedelta( days=ConfigHandler.POSTING_RETENTION_IN_DAYS) with session_scope(self.dbms) as session: vacancy_list = [ Vacancies(platform=self.platform.name, date=today, url="http://", title="title", company="aha", search_topic='Java', location="Wien"), Vacancies(platform=self.platform.name, date=very_old_post_date, url="http://", title="title", company="aha", search_topic='Java', location="Wien") ] for v in vacancy_list: session.add(v) self.dbms.cleanup_job_postings_in_database() with session_scope(self.dbms) as session: vacancies_instances_list = session.query(Vacancies).all() # Only one should be left self.assertEqual(len(vacancies_instances_list), 1) # Only the new posting should be left self.assertEqual(vacancies_instances_list[0].date, today.date())
def test_insert_row_in_articles(self): with session_scope(self.dbms) as session: new_vacancy = Vacancies(platform=self.platform.name, date=datetime(2019, 1, 1), url="http://", title="title", company="aha", search_topic='Java', location="Wien") session.add(new_vacancy) session.flush() with session_scope(self.dbms) as session: vacancies_instances_list = session.query(Vacancies).all() self.assertEqual(len(vacancies_instances_list), 1) vacancies_instance = vacancies_instances_list[0] self.assertEqual( [vacancies_instance.id, vacancies_instance.platform], [1, 'test.at'])
def test_inserting_an_already_existing_vacancy(self): """ Test that a newly scraped entry is skipped when it already exists in the database """ with session_scope(dbms=self.dbms) as session: session.add(Vacancies(**self.vacancy_entries[0])) with session_scope(dbms=self.dbms) as session: vacancy_entries_query_set = session.query(Vacancies).all() # Check that only 1 row exists query_set_len = len(vacancy_entries_query_set) self.assertEqual(query_set_len, 1) self.karriere_at_handler._save_vacancy_entries_to_database(vacancy_entries=self.vacancy_entries, search_topic=self.search_topic) with session_scope(dbms=self.dbms) as session: vacancy_entries_query_set = session.query(Vacancies).all() query_set_len = len(vacancy_entries_query_set) # Count() should stay 1 self.assertEqual(query_set_len, 1)
def test_disabled_retention_in_days(self): """Test if value of "disabled" disables auto-deletion""" # Reset Value to 14 (might be None due to other tests) ConfigHandler.POSTING_RETENTION_IN_DAYS = 30 today = datetime.now() very_old_post_date = today - timedelta( days=ConfigHandler.POSTING_RETENTION_IN_DAYS) ConfigHandler.CONFIG_PATH = os.path.join( ConfigHandler.ROOT_DIR, 'tests', 'test_data', 'config_jsons', 'config_disabled_retention_days.json') ConfigHandler.validate_config_file_base_variables() browser_handler = BrowserHandler() browser = browser_handler.get_browser() platform_registry = PlatformRegistry(browser=browser, dbms=self.dbms) platform_registry.register_new_platform(KarriereATHandler) platform_registry.create_platform_entries_in_database() ConfigHandler.validate_search_topics( platform_registry=platform_registry) browser_handler.close_browser() # Check if Posting-retention-in-days was correctly set to None self.assertEqual(ConfigHandler.POSTING_RETENTION_IN_DAYS, None) with session_scope(self.dbms) as session: vacancy_list = [ Vacancies(platform=self.platform.name, date=today, url="http://", title="title", company="aha", search_topic='Java', location="Wien"), Vacancies(platform=self.platform.name, date=very_old_post_date, url="http://", title="title", company="aha", search_topic='Java', location="Wien") ] for v in vacancy_list: session.add(v) self.dbms.cleanup_job_postings_in_database() with session_scope(self.dbms) as session: vacancies_instances_list = session.query(Vacancies).all() # Auto-Deletion should be disabled - both entries should be present self.assertEqual(len(vacancies_instances_list), 2) # Auto-Deletion should be disabled - both entries should be present date_list = [row.date for row in vacancies_instances_list] self.assertEqual( date_list, [today.date(), very_old_post_date.date()])