def test_get_item_similarity(self): exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual(recommendations_services.get_item_similarity( exp_summaries['exp_id_1'].category, exp_summaries['exp_id_1'].language_code, exp_summaries['exp_id_1'].owner_ids, exp_summaries['exp_id_2'].category, exp_summaries['exp_id_2'].language_code, exp_summaries['exp_id_2'].exploration_model_last_updated, exp_summaries['exp_id_2'].owner_ids, exp_summaries['exp_id_2'].status), 4.5) self.assertEqual(recommendations_services.get_item_similarity( exp_summaries['exp_id_4'].category, exp_summaries['exp_id_4'].language_code, exp_summaries['exp_id_4'].owner_ids, exp_summaries['exp_id_4'].category, exp_summaries['exp_id_4'].language_code, exp_summaries['exp_id_4'].exploration_model_last_updated, exp_summaries['exp_id_4'].owner_ids, exp_summaries['exp_id_4'].status), 9.0) rights_manager.unpublish_exploration(self.admin, 'exp_id_2') exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual(recommendations_services.get_item_similarity( exp_summaries['exp_id_1'].category, exp_summaries['exp_id_1'].language_code, exp_summaries['exp_id_1'].owner_ids, exp_summaries['exp_id_2'].category, exp_summaries['exp_id_2'].language_code, exp_summaries['exp_id_2'].exploration_model_last_updated, exp_summaries['exp_id_2'].owner_ids, exp_summaries['exp_id_2'].status), 0.0)
def map(item): # Only process the exploration if it is not private if item.status == rights_manager.ACTIVITY_STATUS_PRIVATE: return exp_summary_id = item.id exp_summaries_dict = ( exp_services.get_non_private_exploration_summaries()) # Note: This is needed because the exp_summaries_dict is sometimes # different from the summaries in the datastore, especially when # new explorations are added. if exp_summary_id not in exp_summaries_dict: return reference_exp_summary = exp_summaries_dict[exp_summary_id] exp_summaries_items = exp_summaries_dict.iteritems() for compared_exp_id, compared_exp_summary in exp_summaries_items: if compared_exp_id != exp_summary_id: similarity_score = ( recommendations_services.get_item_similarity( reference_exp_summary.category, reference_exp_summary.language_code, reference_exp_summary.owner_ids, compared_exp_summary.category, compared_exp_summary.language_code, compared_exp_summary.exploration_model_last_updated, compared_exp_summary.owner_ids, compared_exp_summary.status)) if similarity_score >= SIMILARITY_SCORE_THRESHOLD: yield (exp_summary_id, { 'similarity_score': similarity_score, 'exp_id': compared_exp_id })
def map(item): # Only process the exploration if it is not private if item.status == rights_manager.ACTIVITY_STATUS_PRIVATE: return exp_summary_id = item.id exp_summaries_dict = ( exp_services.get_non_private_exploration_summaries()) # Note: This is needed because the exp_summaries_dict is sometimes # different from the summaries in the datastore, especially when # new explorations are added. if exp_summary_id not in exp_summaries_dict: return reference_exp_summary = exp_summaries_dict[exp_summary_id] exp_summaries_items = exp_summaries_dict.iteritems() for compared_exp_id, compared_exp_summary in exp_summaries_items: if compared_exp_id != exp_summary_id: similarity_score = ( recommendations_services.get_item_similarity( reference_exp_summary.category, reference_exp_summary.language_code, reference_exp_summary.owner_ids, compared_exp_summary.category, compared_exp_summary.language_code, compared_exp_summary.exploration_model_last_updated, compared_exp_summary.owner_ids, compared_exp_summary.status)) if similarity_score >= SIMILARITY_SCORE_THRESHOLD: yield (exp_summary_id, { 'similarity_score': similarity_score, 'exp_id': compared_exp_id })
def test_get_item_similarity(self): exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual( recommendations_services.get_item_similarity( exp_summaries['exp_id_1'], exp_summaries['exp_id_2']), 4.5) self.assertEqual( recommendations_services.get_item_similarity( exp_summaries['exp_id_4'], exp_summaries['exp_id_4']), 9.0) system_user = user_services.get_system_user() rights_manager.unpublish_exploration(system_user, 'exp_id_2') exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual( recommendations_services.get_item_similarity( exp_summaries['exp_id_1'], exp_summaries['exp_id_2']), 0.0)
def process( self, ref_exp_summary_model: exp_models.ExpSummaryModel, compared_exp_summary_models: Iterable[exp_models.ExpSummaryModel] ) -> Iterable[Tuple[str, Dict[str, Union[str, float]]]]: """Compute similarities between exploraitons. Args: ref_exp_summary_model: ExpSummaryModel. Reference exploration summary. We are trying to find explorations similar to this reference summary. compared_exp_summary_models: list(ExpSummaryModel). List of other explorations summaries against which we compare the reference summary. Yields: (str, dict(str, str|float)). Tuple, the first element is the exploration ID of the reference exploration summary. The second is a dictionary. The structure of the dictionary is: exp_id: str. The ID of the similar exploration. similarity_score: float. The similarity score for the exploration. """ with datastore_services.get_ndb_context(): for compared_exp_summary_model in compared_exp_summary_models: if compared_exp_summary_model.id == ref_exp_summary_model.id: continue similarity_score = recommendations_services.get_item_similarity( ref_exp_summary_model, compared_exp_summary_model) if similarity_score >= SIMILARITY_SCORE_THRESHOLD: yield (ref_exp_summary_model.id, { 'similarity_score': similarity_score, 'exp_id': compared_exp_summary_model.id })
def test_get_item_similarity(self): exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual(recommendations_services.get_item_similarity( exp_summaries['exp_id_1'].category, exp_summaries['exp_id_1'].language_code, exp_summaries['exp_id_1'].owner_ids, exp_summaries['exp_id_2'].category, exp_summaries['exp_id_2'].language_code, exp_summaries['exp_id_2'].exploration_model_last_updated, exp_summaries['exp_id_2'].owner_ids, exp_summaries['exp_id_2'].status), 4.5) self.assertEqual(recommendations_services.get_item_similarity( exp_summaries['exp_id_4'].category, exp_summaries['exp_id_4'].language_code, exp_summaries['exp_id_4'].owner_ids, exp_summaries['exp_id_4'].category, exp_summaries['exp_id_4'].language_code, exp_summaries['exp_id_4'].exploration_model_last_updated, exp_summaries['exp_id_4'].owner_ids, exp_summaries['exp_id_4'].status), 9.0) rights_manager.publicize_exploration(self.admin_id, 'exp_id_4') exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual(recommendations_services.get_item_similarity( exp_summaries['exp_id_4'].category, exp_summaries['exp_id_4'].language_code, exp_summaries['exp_id_4'].owner_ids, exp_summaries['exp_id_4'].category, exp_summaries['exp_id_4'].language_code, exp_summaries['exp_id_4'].exploration_model_last_updated, exp_summaries['exp_id_4'].owner_ids, exp_summaries['exp_id_4'].status), 10.0) rights_manager.unpublish_exploration(self.admin_id, 'exp_id_2') exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual(recommendations_services.get_item_similarity( exp_summaries['exp_id_1'].category, exp_summaries['exp_id_1'].language_code, exp_summaries['exp_id_1'].owner_ids, exp_summaries['exp_id_2'].category, exp_summaries['exp_id_2'].language_code, exp_summaries['exp_id_2'].exploration_model_last_updated, exp_summaries['exp_id_2'].owner_ids, exp_summaries['exp_id_2'].status), 0.0)
def test_get_item_similarity(self) -> None: exp_summaries = exp_services.get_all_exploration_summaries( ) # type: ignore[no-untyped-call] self.assertEqual( recommendations_services.get_item_similarity( exp_summaries['exp_id_1'], exp_summaries['exp_id_2']), 4.5) self.assertEqual( recommendations_services.get_item_similarity( exp_summaries['exp_id_4'], exp_summaries['exp_id_4']), 9.0) system_user = user_services.get_system_user( ) # type: ignore[no-untyped-call] rights_manager.unpublish_exploration( system_user, 'exp_id_2') # type: ignore[no-untyped-call] exp_summaries = exp_services.get_all_exploration_summaries( ) # type: ignore[no-untyped-call] self.assertEqual( recommendations_services.get_item_similarity( exp_summaries['exp_id_1'], exp_summaries['exp_id_2']), 0.0)
def map(item): from core.domain import exp_services from core.domain import recommendations_services # Note: There is a threshold so that bad recommendations will be # discarded even if an exploration has few similar explorations. SIMILARITY_SCORE_THRESHOLD = 3.0 exp_summary_id = item.id exp_summaries_dict = exp_services.get_non_private_exploration_summaries() for compared_exp_id in exp_summaries_dict: if compared_exp_id != exp_summary_id: similarity_score = recommendations_services.get_item_similarity(exp_summary_id, compared_exp_id) if similarity_score >= SIMILARITY_SCORE_THRESHOLD: yield (exp_summary_id, {"similarity_score": similarity_score, "exp_id": compared_exp_id})
def map(item): from core.domain import exp_services from core.domain import recommendations_services from core.domain import rights_manager # Only process the exploration if it is not private if item.status == rights_manager.EXPLORATION_STATUS_PRIVATE: return # Note: There is a threshold so that bad recommendations will be # discarded even if an exploration has few similar explorations. SIMILARITY_SCORE_THRESHOLD = 3.0 exp_summary_id = item.id exp_summaries_dict = ( exp_services.get_non_private_exploration_summaries()) # Note: This is needed because the exp_summaries_dict is sometimes # different from the summaries in the datastore, especially when # new explorations are added. if exp_summary_id not in exp_summaries_dict: return reference_exp_summary = exp_summaries_dict[exp_summary_id] for compared_exp_id, compared_exp_summary in exp_summaries_dict.iteritems( ): if compared_exp_id != exp_summary_id: similarity_score = ( recommendations_services.get_item_similarity( reference_exp_summary.category, reference_exp_summary.language_code, reference_exp_summary.owner_ids, compared_exp_summary.category, compared_exp_summary.language_code, compared_exp_summary.exploration_model_last_updated, compared_exp_summary.owner_ids, compared_exp_summary.status)) if similarity_score >= SIMILARITY_SCORE_THRESHOLD: yield (exp_summary_id, { 'similarity_score': similarity_score, 'exp_id': compared_exp_id })
def test_get_item_similarity(self): with self.assertRaisesRegexp( Exception, 'Invalid reference_exp_id fake_exp_id'): recommendations_services.get_item_similarity( 'fake_exp_id', 'fake_exp_id_2') self.assertEqual(recommendations_services.get_item_similarity( 'exp_id_1', 'exp_id_2'), 4.5) self.assertEqual(recommendations_services.get_item_similarity( 'exp_id_4', 'exp_id_4'), 9.0) rights_manager.publicize_exploration(self.ADMIN_ID, 'exp_id_4') self.assertEqual(recommendations_services.get_item_similarity( 'exp_id_4', 'exp_id_4'), 10.0) rights_manager.unpublish_exploration(self.ADMIN_ID, 'exp_id_2') self.assertEqual(recommendations_services.get_item_similarity( 'exp_id_1', 'exp_id_2'), 0.0)
def map(item): from core.domain import exp_services from core.domain import recommendations_services from core.domain import rights_manager # Only process the exploration if it is not private if item.status == rights_manager.EXPLORATION_STATUS_PRIVATE: return # Note: There is a threshold so that bad recommendations will be # discarded even if an exploration has few similar explorations. SIMILARITY_SCORE_THRESHOLD = 3.0 exp_summary_id = item.id exp_summaries_dict = exp_services.get_non_private_exploration_summaries() # Note: This is needed because the exp_summaries_dict is sometimes # different from the summaries in the datastore, especially when # new explorations are added. if exp_summary_id not in exp_summaries_dict: return reference_exp_summary = exp_summaries_dict[exp_summary_id] for compared_exp_id, compared_exp_summary in exp_summaries_dict.iteritems(): if compared_exp_id != exp_summary_id: similarity_score = recommendations_services.get_item_similarity( reference_exp_summary.category, reference_exp_summary.language_code, reference_exp_summary.owner_ids, compared_exp_summary.category, compared_exp_summary.language_code, compared_exp_summary.exploration_model_last_updated, compared_exp_summary.owner_ids, compared_exp_summary.status, ) if similarity_score >= SIMILARITY_SCORE_THRESHOLD: yield (exp_summary_id, {"similarity_score": similarity_score, "exp_id": compared_exp_id})
def test_get_item_similarity(self): exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual( recommendations_services.get_item_similarity( exp_summaries["exp_id_1"].category, exp_summaries["exp_id_1"].language_code, exp_summaries["exp_id_1"].owner_ids, exp_summaries["exp_id_2"].category, exp_summaries["exp_id_2"].language_code, exp_summaries["exp_id_2"].exploration_model_last_updated, exp_summaries["exp_id_2"].owner_ids, exp_summaries["exp_id_2"].status, ), 4.5, ) self.assertEqual( recommendations_services.get_item_similarity( exp_summaries["exp_id_4"].category, exp_summaries["exp_id_4"].language_code, exp_summaries["exp_id_4"].owner_ids, exp_summaries["exp_id_4"].category, exp_summaries["exp_id_4"].language_code, exp_summaries["exp_id_4"].exploration_model_last_updated, exp_summaries["exp_id_4"].owner_ids, exp_summaries["exp_id_4"].status, ), 9.0, ) rights_manager.publicize_exploration(self.ADMIN_ID, "exp_id_4") exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual( recommendations_services.get_item_similarity( exp_summaries["exp_id_4"].category, exp_summaries["exp_id_4"].language_code, exp_summaries["exp_id_4"].owner_ids, exp_summaries["exp_id_4"].category, exp_summaries["exp_id_4"].language_code, exp_summaries["exp_id_4"].exploration_model_last_updated, exp_summaries["exp_id_4"].owner_ids, exp_summaries["exp_id_4"].status, ), 10.0, ) rights_manager.unpublish_exploration(self.ADMIN_ID, "exp_id_2") exp_summaries = exp_services.get_all_exploration_summaries() self.assertEqual( recommendations_services.get_item_similarity( exp_summaries["exp_id_1"].category, exp_summaries["exp_id_1"].language_code, exp_summaries["exp_id_1"].owner_ids, exp_summaries["exp_id_2"].category, exp_summaries["exp_id_2"].language_code, exp_summaries["exp_id_2"].exploration_model_last_updated, exp_summaries["exp_id_2"].owner_ids, exp_summaries["exp_id_2"].status, ), 0.0, )