Beispiel #1
0
 def setUp(self):
     """Setting up the persona for a test."""
     super(TrainingAdviceScoringModelTestCase, self).setUp()
     self.persona = self._random_persona().clone()
     self._many_trainings = [
         training_pb2.Training(),
         training_pb2.Training(),
         training_pb2.Training(),
     ]
Beispiel #2
0
    def setUp(self) -> None:
        """Setting up the persona for a test."""

        super().setUp()
        self.persona = self._random_persona().clone()
        self._many_trainings = [
            training_pb2.Training(),
            training_pb2.Training(),
            training_pb2.Training(),
        ]
Beispiel #3
0
    def test_with_trainings(self) -> None:
        """With trainings."""

        self.project.advices.add(advice_id='training')
        self.mock_get_trainings.return_value = [
            training_pb2.Training(name='Fedback 101', city_name='Lyon'),
            training_pb2.Training(name='Customer Care', city_name='Brussels'),
            training_pb2.Training(name='Drongo', city_name='Paris'),
            training_pb2.Training(name='Cheveux de riche', city_name='Paris'),
        ]

        self._assert_user_receives_focus()

        self._assert_has_unsubscribe_link()
        self._assert_has_unsubscribe_url(
            'changeEmailSettingsUrl', **{
                'coachingEmailFrequency': 'EMAIL_ONCE_A_MONTH',
            })
        self._assert_has_status_update_link('statusUpdateUrl')
        self.assertEqual('en Haute-Garonne',
                         self._variables.pop('inDepartement'))
        self.assertEqual('de steward', self._variables.pop('ofJobName'))
        self._assert_has_logged_url('loginUrl', '/projet/0')
        self._assert_has_logged_url('deepTrainingAdviceUrl',
                                    '/projet/0/methode/training')
        self._assert_remaining_variables({
            'firstName':
            'Patrick',
            'gender':
            'MASCULINE',
            'numTrainings':
            3,
            'trainings': [
                {
                    'cityName': 'Lyon',
                    'name': 'Fedback 101'
                },
                {
                    'cityName': 'Brussels',
                    'name': 'Customer Care'
                },
                {
                    'cityName': 'Paris',
                    'name': 'Drongo'
                },
            ],
        })
Beispiel #4
0
def _sort_filter_courses(
        courses: list[dict[str, Any]]) -> list[training_pb2.Training]:
    # We only consider courses that at least 10% of the workers mentioned.
    sorted_trainings = sorted(courses,
                              key=lambda course: course.get('percentage', 0),
                              reverse=True)
    filtered_trainings = [
        training_pb2.Training(name=course['name'])
        for course in sorted_trainings if course.get('percentage', 0) >= 10
    ]
    if not filtered_trainings:
        logging.warning(
            'Request for hesa courses failed, there is no training above the threshold.'
        )
        return NO_TRAININGS
    return filtered_trainings
Beispiel #5
0
    def test_run_all(self, mock_carif_get_trainings):
        """Run all scoring models on all personas."""

        mock_carif_get_trainings.return_value = [
            training_pb2.Training(),
            training_pb2.Training(),
            training_pb2.Training(),
        ]
        database = mongomock.MongoClient().test
        _load_json_to_mongo(database, 'job_group_info')
        _load_json_to_mongo(database, 'local_diagnosis')
        _load_json_to_mongo(database, 'associations')
        _load_json_to_mongo(database, 'volunteering_missions')
        _load_json_to_mongo(database, 'hiring_cities')
        _load_json_to_mongo(database, 'cities')
        _load_json_to_mongo(database, 'departements')
        _load_json_to_mongo(database, 'seasonal_jobbing')
        _load_json_to_mongo(database, 'specific_to_job_advice')
        _load_json_to_mongo(database, 'reorient_jobbing')
        _load_json_to_mongo(database, 'reorient_to_close')

        scores = collections.defaultdict(
            lambda: collections.defaultdict(float))
        # Mock the "now" date so that scoring models that are based on time
        # (like "Right timing") are deterministic.
        now = datetime.datetime(2016, 9, 27)
        for model_name in list(scoring.SCORING_MODELS.keys()):
            model = scoring.get_scoring_model(model_name)
            self.assertTrue(model, msg=model_name)
            scores[model_name] = {}
            for name, persona in _PERSONAS.items():
                scoring_project = persona.scoring_project(database, now=now)
                try:
                    score, explanations = model.score_and_explain(
                        scoring_project)
                except scoring.NotEnoughDataException:
                    score = -1
                    explanations = []
                scores[model_name][name] = score
                self.assertIsInstance(
                    scores[model_name][name],
                    numbers.Number,
                    msg='while using the model "{}" to score "{}"'.format(
                        model_name, name))
                self._assert_proper_explanations(
                    explanations,
                    scoring_project,
                    msg='while using the model "{}" to explain the score of "{}"'
                    .format(model_name, name))

        for name in _PERSONAS:
            persona_scores = [
                max(model_scores[name], 0) for model_scores in scores.values()
            ]
            self.assertLess(
                1,
                len(set(persona_scores)),
                msg='Persona "{}" has the same score across all models.'.
                format(name))

        model_scores_hashes = collections.defaultdict(list)
        for model_name, model_scores in scores.items():
            model = scoring.SCORING_MODELS[model_name]
            if isinstance(model, scoring.ConstantScoreModel):
                continue
            self.assertLess(
                1,
                len(set(model_scores.values())),
                msg='Model "{}" has the same score for all personas.'.format(
                    model_name))
            scores_hash = json.dumps(model_scores, sort_keys=True)
            model_scores_hashes[scores_hash].append(model_name)
        models_with_same_score = \
            [models for models in model_scores_hashes.values() if len(models) > 1]
        self.assertFalse(models_with_same_score,
                         msg='Some models always have the same scores')
Beispiel #6
0
def get_trainings(rome_id, departement_id):
    """Helper function to get trainings from the CARIF API.

    Carif sends us multiple trainings that have the same city and title, this function only return
    one training per city/title.
    """
    no_trainings = []

    try:
        xml = requests.get(_CARIF_URL,
                           params={
                               'idsMetiers': rome_id,
                               'code-departement': departement_id
                           })
    except requests.exceptions.ConnectionError as error:
        logging.warning('XML request for intercarif failed:\n%s', error)
        return no_trainings

    trainings = []

    if xml.status_code != 200:
        logging.warning('XML request for intercarif failed with error code %d',
                        xml.status_code)
        return no_trainings

    if not xml.text:
        logging.warning(
            'XML request for intercarif failed, there is no text in the response.'
        )
        return no_trainings

    # Intercarif does not provide an encoding in the response header which misleads the xmltodict
    # module.
    xml.encoding = 'utf-8'

    info = xmltodict.parse(xml.text)

    offers = []
    try:
        offers = info['lheo-index']['resumes-offres']['resume-offre']
    except KeyError:
        return no_trainings

    # Since our goal is not to give a super tool to find all the precise training and their
    # differences, we just show one, and dedup them on a key composed of city and name.
    trainings_keys = set()

    for offer in offers:
        try:
            formacodes = offer['domaine-formation']['code-FORMACODE']
            if not isinstance(formacodes, list):
                formacodes = [formacodes]

            name = offer['intitule-formation'].replace('\n', ' ')
            city_name = offer['ville']
            url = offer['@href']

            key = _make_key(name, city_name)
            if key in trainings_keys:
                continue

            training = training_pb2.Training(name=name,
                                             city_name=city_name,
                                             url=url,
                                             formacodes=formacodes)

            trainings_keys.add(key)
            trainings.append(training)
        except KeyError:
            # If an important field is missing, we skip this training.
            logging.info(
                'Skipping the offer from CARIF, an important field is missing: %s',
                offer)
            continue

    return trainings
Beispiel #7
0
    def test_run_all(self, mock_carif_get_trainings: mock.MagicMock) -> None:
        """Run all scoring models on all personas."""

        mock_carif_get_trainings.return_value = [
            training_pb2.Training(),
            training_pb2.Training(),
            training_pb2.Training(),
        ]
        database = mongomock.MongoClient().test
        _load_json_to_mongo(database, 'associations')
        _load_json_to_mongo(database, 'cities')
        _load_json_to_mongo(database, 'departements')
        _load_json_to_mongo(database, 'hiring_cities')
        _load_json_to_mongo(database, 'job_group_info')
        _load_json_to_mongo(database, 'local_diagnosis')
        _load_json_to_mongo(database, 'online_salons')
        _load_json_to_mongo(database, 'reorient_jobbing')
        _load_json_to_mongo(database, 'reorient_to_close')
        _load_json_to_mongo(database, 'seasonal_jobbing')
        _load_json_to_mongo(database, 'skills_for_future')
        _load_json_to_mongo(database, 'specific_to_job_advice')
        _load_json_to_mongo(database, 'volunteering_missions')

        scores: Dict[str, Dict[str, float]] = \
            collections.defaultdict(lambda: collections.defaultdict(float))
        # Mock the "now" date so that scoring models that are based on time
        # (like "Right timing") are deterministic.
        now = datetime.datetime(2016, 9, 27)
        for model_name in list(scoring.SCORING_MODELS.keys()):
            model = scoring.get_scoring_model(model_name)
            if not model:  # pragma: no-cover
                raise KeyError(f'No scoring model with name "{model_name}".')
            self.assertTrue(model, msg=model_name)
            scores[model_name] = {}
            for name, persona in _PERSONAS.items():
                scoring_project = persona.scoring_project(database, now=now)
                try:
                    score, explanations = model.score_and_explain(
                        scoring_project)
                except scoring.NotEnoughDataException:
                    score = -1
                    explanations = []
                scores[model_name][name] = score
                self.assertIsInstance(
                    scores[model_name][name],
                    numbers.Number,
                    msg=
                    f'while using the model "{model_name}" to score "{name}"')
                self._assert_proper_explanations(
                    explanations,
                    scoring_project,
                    msg=
                    f'while using the model "{model_name}" to explain the score of "{name}"'
                )

        for name in _PERSONAS:
            persona_scores = [
                max(model_scores[name], 0) for model_scores in scores.values()
            ]
            self.assertLess(
                1,
                len(set(persona_scores)),
                msg=f'Persona "{name}" has the same score across all models.')

        model_scores_hashes: Dict[str, Set[str]] = collections.defaultdict(set)
        # A mapping of renamings in progress.
        renamings = {
            'for-exact-experienced(internship)':
            'for-exact-experienced(intern)',
        }
        for base_name, target_name in renamings.items():
            self.assertEqual(
                json.dumps(scores.pop(base_name), sort_keys=True),
                json.dumps(scores[target_name], sort_keys=True),
                msg=
                f'The model "{base_name}" is not consistent with its renaming "{target_name}"'
            )
        for model_name, model_scores in scores.items():
            model = scoring.SCORING_MODELS[model_name]
            if isinstance(model, scoring.ConstantScoreModel):
                continue
            self.assertLess(
                1,
                len(set(model_scores.values())),
                msg=f'Model "{model_name}" has the same score for all personas.'
            )
            scores_hash = json.dumps(model_scores, sort_keys=True)
            model_scores_hashes[scores_hash].add(model_name)
        models_with_same_score = \
            [models for models in model_scores_hashes.values() if len(models) > 1]
        self.assertFalse(models_with_same_score,
                         msg='Some models always have the same scores')