def _group_filter_fields(self, record, record_name, field='filters', others=None): """Group multiple fields to specify filters. Args: record: the record to convert. record_name: the name of the type of record for error messages. field: the main field for filters, it should contain an array of filter IDs. others: a list of fields which, if not empty, create extra fields by combining the field name and their content, e.g. "for-departement" with value "75,69" would add a filter "for-departement(75,69)". Returns: A list of valid filters. Raises: ValueError: if one the filter is not implemented. """ filters = record['fields'].get(field, []) if others: for filter_type in others: filter_value = record['fields'].get(filter_type) if filter_value: filters.append('%s(%s)' % (filter_type, filter_value)) for one_filter in filters: if not scoring.get_scoring_model(one_filter): raise ValueError( '%s uses the filter "%s" that is not implemented yet' % (record_name, one_filter)) return filters
def compute_advices_for_project(user, project, database): """Advise on a user project. Args: user: the user's data, mainly used for their profile and features_enabled. project: the project data. It will not be modified. database: access to the MongoDB with market data. Returns: an Advices protobuffer containing a list of recommendations. """ scoring_project = scoring.ScoringProject(project, user.profile, user.features_enabled, database, now=now.get()) scores = {} advice_modules = _advice_modules(database) advice = project_pb2.Advices() for module in advice_modules: if not module.is_ready_for_prod and not user.features_enabled.alpha: continue scoring_model = scoring.get_scoring_model(module.trigger_scoring_model) if scoring_model is None: logging.warning( 'Not able to score advice "%s", the scoring model "%s" is unknown.', module.advice_id, module.trigger_scoring_model) continue if user.features_enabled.all_modules: scores[module.advice_id] = 3 else: try: scores[module.advice_id] = scoring_model.score(scoring_project) except Exception: # pylint: disable=broad-except logging.exception('Scoring "%s" crashed for:\n%s\n%s', module.trigger_scoring_model, scoring_project.user_profile, scoring_project.details) modules = sorted(advice_modules, key=lambda m: (scores.get(m.advice_id, 0), m.advice_id), reverse=True) incompatible_modules = set() for module in modules: if not scores.get(module.advice_id): # We can break as others will have 0 score as well. break if module.airtable_id in incompatible_modules and not user.features_enabled.all_modules: continue piece_of_advice = advice.advices.add() piece_of_advice.advice_id = module.advice_id piece_of_advice.num_stars = scores.get(module.advice_id) incompatible_modules.update(module.incompatible_advice_ids) _compute_extra_data(piece_of_advice, module, scoring_project) _maybe_override_advice_data(piece_of_advice, module, scoring_project) return advice
def convert_record(self, airtable_record): """Convert an AirTable record to a dict proto-Json ready.""" fields = super(_AdviceModuleConverter, self).convert_record(airtable_record) trigger_scoring_model = fields.get('triggerScoringModel') if not scoring.get_scoring_model(trigger_scoring_model): raise ValueError( 'Advice module "%s" uses the scoring model "%s" that is not implemented yet' % (fields['_id'], trigger_scoring_model)) if 'emailFacts' in fields: fields['emailFacts'] = fields['emailFacts'].split('\n') return fields
def _maybe_override_advice_data(piece_of_advice, module, scoring_project): scoring_model = scoring.get_scoring_model(module.trigger_scoring_model) try: get_advice_override = scoring_model.get_advice_override except AttributeError: # The scoring model has no get_advice_override method; return override_data = get_advice_override(scoring_project, piece_of_advice) if not override_data: # Nothing to override. return piece_of_advice.MergeFrom(override_data)
def _get_expanded_card_data(user_proto, project, advice_id): module = advisor.get_advice_module(advice_id, _DB) if not module or not module.trigger_scoring_model: flask.abort(404, 'Le module "{}" n\'existe pas'.format(advice_id)) model = scoring.get_scoring_model(module.trigger_scoring_model) if not model or not hasattr(model, 'get_expanded_card_data'): flask.abort( 404, 'Le module "{}" n\'a pas de données supplémentaires'.format( advice_id)) scoring_project = scoring.ScoringProject(project, user_proto.profile, user_proto.features_enabled, _DB, now=now.get()) return model.get_expanded_card_data(scoring_project)
def convert_record(self, airtable_record): """Convert an AirTable record to a dict proto-Json ready.""" if 'image' in airtable_record['fields'] and airtable_record['fields'][ 'image']: airtable_record['fields']['image_url'] = airtable_record['fields'][ 'image'][0]['url'] fields = super(_ActionTemplateConverter, self).convert_record(airtable_record) link = fields.get('link') if link and not _LINK_REGEXP.match(link): raise ValueError( 'Action template "%s" has an irregular link: %s.' % (fields['_id'], link)) for action_filter in fields.get('filters', []): if not scoring.get_scoring_model(action_filter): raise ValueError( 'Action template "%s" uses the filter "%s" that is not implemented yet' % (fields['_id'], action_filter)) return fields
def _chantiers(): """Returns a list of known chantiers as protos.""" was_empty = not _CHANTIERS all_chantiers = proto.cache_mongo_collection(_DB.chantiers.find, _CHANTIERS, chantier_pb2.Chantier) if was_empty: # Validate chantiers. required_models = set(c.scoring_model for c in all_chantiers.values()) | set( scoring.GROUP_SCORING_MODELS.values()) existing_models = set(scoring.SCORING_MODELS) | set( name for name in required_models if scoring.get_scoring_model(name)) if required_models - existing_models: logging.warning('Some scoring models will be random: %s', required_models - existing_models) if existing_models - required_models: logging.warning('Some scoring models are unused: %s', existing_models - required_models) return all_chantiers
def _compute_extra_data(piece_of_advice, module, scoring_project): if not module.extra_data_field_name: return scoring_model = scoring.get_scoring_model(module.trigger_scoring_model) try: compute_extra_data = scoring_model.compute_extra_data except AttributeError: logging.warning( 'The scoring model %s has no compute_extra_data method', module.trigger_scoring_model) return extra_data = compute_extra_data(scoring_project) if not extra_data: return try: data_field = getattr(piece_of_advice, module.extra_data_field_name) except NameError: logging.warning( 'The Advice proto does not have a %s field as requested by the module %s', module.extra_data_field_name, module.advice_id) return data_field.CopyFrom(extra_data)
def _maybe_recommend_advice(user, project, database): if user.features_enabled.advisor != user_pb2.ACTIVE or project.advices: return False scoring_project = scoring.ScoringProject(project, user.profile, user.features_enabled, database) scores = {} advice_modules = _advice_modules(database) for module in advice_modules: if not module.is_ready_for_prod and not user.features_enabled.alpha: continue scoring_model = scoring.get_scoring_model(module.trigger_scoring_model) if scoring_model is None: logging.warning( 'Not able to score advice "%s", the scoring model "%s" is unknown.', module.advice_id, module.trigger_scoring_model) continue scores[module.advice_id] = scoring_model.score(scoring_project).score modules = sorted(advice_modules, key=lambda m: (scores.get(m.advice_id, 0), m.advice_id), reverse=True) incompatible_modules = set() for module in modules: if not scores.get(module.advice_id): # We can break as others will have 0 score as well. break if module.airtable_id in incompatible_modules: continue piece_of_advice = project.advices.add() piece_of_advice.advice_id = module.advice_id piece_of_advice.status = project_pb2.ADVICE_RECOMMENDED piece_of_advice.num_stars = scores.get(module.advice_id) incompatible_modules.update(module.incompatible_advice_ids) _compute_extra_data(piece_of_advice, module, scoring_project) return True
def setUpClass(cls): super(_TestCase, cls).setUpClass() cls.model_id = model_id cls.model = scoring.get_scoring_model(model_id)
def test_run_all(self, mock_carif_get_trainings): """Run all scoring models on all personas.""" mock_carif_get_trainings.return_value = [ training_pb2.Training(), training_pb2.Training(), training_pb2.Training(), ] database = mongomock.MongoClient().test _load_json_to_mongo(database, 'job_group_info') _load_json_to_mongo(database, 'local_diagnosis') _load_json_to_mongo(database, 'associations') _load_json_to_mongo(database, 'volunteering_missions') _load_json_to_mongo(database, 'hiring_cities') _load_json_to_mongo(database, 'cities') _load_json_to_mongo(database, 'seasonal_jobbing') _load_json_to_mongo(database, 'specific_to_job_advice') scores = collections.defaultdict( lambda: collections.defaultdict(float)) # Mock the "now" date so that scoring models that are based on time # (like "Right timing") are deterministic. now = datetime.datetime(2016, 9, 27) for model_name in list(scoring.SCORING_MODELS.keys()): model = scoring.get_scoring_model(model_name) self.assertTrue(model, msg=model_name) scores[model_name] = {} for name, persona in _PERSONAS.items(): scores[model_name][name] = model.score( persona.scoring_project(database, now=now)) self.assertIsInstance( scores[model_name][name], numbers.Number, msg='while using the model "{}" to score "{}"'.format( model_name, name)) for name in _PERSONAS: persona_scores = [ max(model_scores[name], 0) for model_scores in scores.values() ] self.assertLess( 1, len(set(persona_scores)), msg='Persona "{}" has the same score across all models.'. format(name)) model_scores_hashes = collections.defaultdict(list) for model_name, model_scores in scores.items(): model = scoring.SCORING_MODELS[model_name] if isinstance(model, scoring.ConstantScoreModel): continue self.assertLess( 1, len(set(model_scores.values())), msg='Model "{}" has the same score for all personas.'.format( model_name)) scores_hash = json.dumps(model_scores, sort_keys=True) model_scores_hashes[scores_hash].append(model_name) models_with_same_score = \ [models for models in model_scores_hashes.values() if len(models) > 1] self.assertFalse(models_with_same_score, msg='Some models always have the same scores')