def setUp(self): """Setting up the persona for a test.""" super(TrainingAdviceScoringModelTestCase, self).setUp() self.persona = self._random_persona().clone() self._many_trainings = [ training_pb2.Training(), training_pb2.Training(), training_pb2.Training(), ]
def setUp(self) -> None: """Setting up the persona for a test.""" super().setUp() self.persona = self._random_persona().clone() self._many_trainings = [ training_pb2.Training(), training_pb2.Training(), training_pb2.Training(), ]
def test_with_trainings(self) -> None: """With trainings.""" self.project.advices.add(advice_id='training') self.mock_get_trainings.return_value = [ training_pb2.Training(name='Fedback 101', city_name='Lyon'), training_pb2.Training(name='Customer Care', city_name='Brussels'), training_pb2.Training(name='Drongo', city_name='Paris'), training_pb2.Training(name='Cheveux de riche', city_name='Paris'), ] self._assert_user_receives_focus() self._assert_has_unsubscribe_link() self._assert_has_unsubscribe_url( 'changeEmailSettingsUrl', **{ 'coachingEmailFrequency': 'EMAIL_ONCE_A_MONTH', }) self._assert_has_status_update_link('statusUpdateUrl') self.assertEqual('en Haute-Garonne', self._variables.pop('inDepartement')) self.assertEqual('de steward', self._variables.pop('ofJobName')) self._assert_has_logged_url('loginUrl', '/projet/0') self._assert_has_logged_url('deepTrainingAdviceUrl', '/projet/0/methode/training') self._assert_remaining_variables({ 'firstName': 'Patrick', 'gender': 'MASCULINE', 'numTrainings': 3, 'trainings': [ { 'cityName': 'Lyon', 'name': 'Fedback 101' }, { 'cityName': 'Brussels', 'name': 'Customer Care' }, { 'cityName': 'Paris', 'name': 'Drongo' }, ], })
def _sort_filter_courses( courses: list[dict[str, Any]]) -> list[training_pb2.Training]: # We only consider courses that at least 10% of the workers mentioned. sorted_trainings = sorted(courses, key=lambda course: course.get('percentage', 0), reverse=True) filtered_trainings = [ training_pb2.Training(name=course['name']) for course in sorted_trainings if course.get('percentage', 0) >= 10 ] if not filtered_trainings: logging.warning( 'Request for hesa courses failed, there is no training above the threshold.' ) return NO_TRAININGS return filtered_trainings
def test_run_all(self, mock_carif_get_trainings): """Run all scoring models on all personas.""" mock_carif_get_trainings.return_value = [ training_pb2.Training(), training_pb2.Training(), training_pb2.Training(), ] database = mongomock.MongoClient().test _load_json_to_mongo(database, 'job_group_info') _load_json_to_mongo(database, 'local_diagnosis') _load_json_to_mongo(database, 'associations') _load_json_to_mongo(database, 'volunteering_missions') _load_json_to_mongo(database, 'hiring_cities') _load_json_to_mongo(database, 'cities') _load_json_to_mongo(database, 'departements') _load_json_to_mongo(database, 'seasonal_jobbing') _load_json_to_mongo(database, 'specific_to_job_advice') _load_json_to_mongo(database, 'reorient_jobbing') _load_json_to_mongo(database, 'reorient_to_close') scores = collections.defaultdict( lambda: collections.defaultdict(float)) # Mock the "now" date so that scoring models that are based on time # (like "Right timing") are deterministic. now = datetime.datetime(2016, 9, 27) for model_name in list(scoring.SCORING_MODELS.keys()): model = scoring.get_scoring_model(model_name) self.assertTrue(model, msg=model_name) scores[model_name] = {} for name, persona in _PERSONAS.items(): scoring_project = persona.scoring_project(database, now=now) try: score, explanations = model.score_and_explain( scoring_project) except scoring.NotEnoughDataException: score = -1 explanations = [] scores[model_name][name] = score self.assertIsInstance( scores[model_name][name], numbers.Number, msg='while using the model "{}" to score "{}"'.format( model_name, name)) self._assert_proper_explanations( explanations, scoring_project, msg='while using the model "{}" to explain the score of "{}"' .format(model_name, name)) for name in _PERSONAS: persona_scores = [ max(model_scores[name], 0) for model_scores in scores.values() ] self.assertLess( 1, len(set(persona_scores)), msg='Persona "{}" has the same score across all models.'. format(name)) model_scores_hashes = collections.defaultdict(list) for model_name, model_scores in scores.items(): model = scoring.SCORING_MODELS[model_name] if isinstance(model, scoring.ConstantScoreModel): continue self.assertLess( 1, len(set(model_scores.values())), msg='Model "{}" has the same score for all personas.'.format( model_name)) scores_hash = json.dumps(model_scores, sort_keys=True) model_scores_hashes[scores_hash].append(model_name) models_with_same_score = \ [models for models in model_scores_hashes.values() if len(models) > 1] self.assertFalse(models_with_same_score, msg='Some models always have the same scores')
def get_trainings(rome_id, departement_id): """Helper function to get trainings from the CARIF API. Carif sends us multiple trainings that have the same city and title, this function only return one training per city/title. """ no_trainings = [] try: xml = requests.get(_CARIF_URL, params={ 'idsMetiers': rome_id, 'code-departement': departement_id }) except requests.exceptions.ConnectionError as error: logging.warning('XML request for intercarif failed:\n%s', error) return no_trainings trainings = [] if xml.status_code != 200: logging.warning('XML request for intercarif failed with error code %d', xml.status_code) return no_trainings if not xml.text: logging.warning( 'XML request for intercarif failed, there is no text in the response.' ) return no_trainings # Intercarif does not provide an encoding in the response header which misleads the xmltodict # module. xml.encoding = 'utf-8' info = xmltodict.parse(xml.text) offers = [] try: offers = info['lheo-index']['resumes-offres']['resume-offre'] except KeyError: return no_trainings # Since our goal is not to give a super tool to find all the precise training and their # differences, we just show one, and dedup them on a key composed of city and name. trainings_keys = set() for offer in offers: try: formacodes = offer['domaine-formation']['code-FORMACODE'] if not isinstance(formacodes, list): formacodes = [formacodes] name = offer['intitule-formation'].replace('\n', ' ') city_name = offer['ville'] url = offer['@href'] key = _make_key(name, city_name) if key in trainings_keys: continue training = training_pb2.Training(name=name, city_name=city_name, url=url, formacodes=formacodes) trainings_keys.add(key) trainings.append(training) except KeyError: # If an important field is missing, we skip this training. logging.info( 'Skipping the offer from CARIF, an important field is missing: %s', offer) continue return trainings
def test_run_all(self, mock_carif_get_trainings: mock.MagicMock) -> None: """Run all scoring models on all personas.""" mock_carif_get_trainings.return_value = [ training_pb2.Training(), training_pb2.Training(), training_pb2.Training(), ] database = mongomock.MongoClient().test _load_json_to_mongo(database, 'associations') _load_json_to_mongo(database, 'cities') _load_json_to_mongo(database, 'departements') _load_json_to_mongo(database, 'hiring_cities') _load_json_to_mongo(database, 'job_group_info') _load_json_to_mongo(database, 'local_diagnosis') _load_json_to_mongo(database, 'online_salons') _load_json_to_mongo(database, 'reorient_jobbing') _load_json_to_mongo(database, 'reorient_to_close') _load_json_to_mongo(database, 'seasonal_jobbing') _load_json_to_mongo(database, 'skills_for_future') _load_json_to_mongo(database, 'specific_to_job_advice') _load_json_to_mongo(database, 'volunteering_missions') scores: Dict[str, Dict[str, float]] = \ collections.defaultdict(lambda: collections.defaultdict(float)) # Mock the "now" date so that scoring models that are based on time # (like "Right timing") are deterministic. now = datetime.datetime(2016, 9, 27) for model_name in list(scoring.SCORING_MODELS.keys()): model = scoring.get_scoring_model(model_name) if not model: # pragma: no-cover raise KeyError(f'No scoring model with name "{model_name}".') self.assertTrue(model, msg=model_name) scores[model_name] = {} for name, persona in _PERSONAS.items(): scoring_project = persona.scoring_project(database, now=now) try: score, explanations = model.score_and_explain( scoring_project) except scoring.NotEnoughDataException: score = -1 explanations = [] scores[model_name][name] = score self.assertIsInstance( scores[model_name][name], numbers.Number, msg= f'while using the model "{model_name}" to score "{name}"') self._assert_proper_explanations( explanations, scoring_project, msg= f'while using the model "{model_name}" to explain the score of "{name}"' ) for name in _PERSONAS: persona_scores = [ max(model_scores[name], 0) for model_scores in scores.values() ] self.assertLess( 1, len(set(persona_scores)), msg=f'Persona "{name}" has the same score across all models.') model_scores_hashes: Dict[str, Set[str]] = collections.defaultdict(set) # A mapping of renamings in progress. renamings = { 'for-exact-experienced(internship)': 'for-exact-experienced(intern)', } for base_name, target_name in renamings.items(): self.assertEqual( json.dumps(scores.pop(base_name), sort_keys=True), json.dumps(scores[target_name], sort_keys=True), msg= f'The model "{base_name}" is not consistent with its renaming "{target_name}"' ) for model_name, model_scores in scores.items(): model = scoring.SCORING_MODELS[model_name] if isinstance(model, scoring.ConstantScoreModel): continue self.assertLess( 1, len(set(model_scores.values())), msg=f'Model "{model_name}" has the same score for all personas.' ) scores_hash = json.dumps(model_scores, sort_keys=True) model_scores_hashes[scores_hash].add(model_name) models_with_same_score = \ [models for models in model_scores_hashes.values() if len(models) > 1] self.assertFalse(models_with_same_score, msg='Some models always have the same scores')