def reduce(key, stringified_values): """Implements the reduce function. This function creates or updates the UserStatsModel instance for the given user. It updates the impact score, total plays of all explorations, number of ratings across all explorations and average rating. Args: key: str. The unique id of the user. stringified_values: list(str). A list of information regarding all the explorations that this user contributes to or owns. Each entry is a stringified dict having the following keys: exploration_impact_score: float. The impact score of all the explorations contributed to by the user. total_plays_for_owned_exp: int. Total plays of all explorations owned by the user. average_rating_for_owned_exp: float. Average of average ratings of all explorations owned by the user. num_ratings_for_owned_exp: int. Total number of ratings of all explorations owned by the user. """ values = [ast.literal_eval(v) for v in stringified_values] exponent = python_utils.divide(2.0, 3) # Find the final score and round to a whole number. user_impact_score = int( python_utils.ROUND( sum(value['exploration_impact_score'] for value in values if value.get('exploration_impact_score'))**exponent)) # Sum up the total plays for all explorations. total_plays = sum(value['total_plays_for_owned_exp'] for value in values if value.get('total_plays_for_owned_exp')) # Sum of ratings across all explorations. sum_of_ratings = 0 # Number of ratings across all explorations. num_ratings = 0 for value in values: if value.get('num_ratings_for_owned_exp'): num_ratings += value['num_ratings_for_owned_exp'] sum_of_ratings += (value['average_rating_for_owned_exp'] * value['num_ratings_for_owned_exp']) mr_model = user_models.UserStatsModel.get_or_create(key) mr_model.impact_score = user_impact_score mr_model.total_plays = total_plays mr_model.num_ratings = num_ratings if sum_of_ratings != 0: average_ratings = python_utils.divide(sum_of_ratings, float(num_ratings)) mr_model.average_ratings = average_ratings mr_model.update_timestamps() mr_model.put()
def _refresh_average_ratings_transactional(user_id, new_rating, old_rating): """Refreshes the average rating for a user. Args: user_id: str. The id of the user. new_rating: int. The new rating of the exploration. old_rating: int|None. The old rating of the exploration before refreshing, or None if the exploration hasn't been rated by the user yet. """ user_stats_model = user_models.UserStatsModel.get(user_id, strict=False) if user_stats_model is None: user_models.UserStatsModel(id=user_id, average_ratings=new_rating, num_ratings=1).put() return num_ratings = user_stats_model.num_ratings average_ratings = user_stats_model.average_ratings if average_ratings is None: average_ratings = new_rating num_ratings += 1 else: sum_of_ratings = (average_ratings * num_ratings) + new_rating if old_rating is None: num_ratings += 1 else: sum_of_ratings -= old_rating average_ratings = python_utils.divide(sum_of_ratings, float(num_ratings)) user_stats_model.average_ratings = average_ratings user_stats_model.num_ratings = num_ratings user_stats_model.update_timestamps() user_stats_model.put()
def test_failing_jobs(self): observed_log_messages = [] def _mock_logging_function(msg, *args): """Mocks logging.error().""" observed_log_messages.append(msg % args) logging_swap = self.swap(logging, 'error', _mock_logging_function) # Mocks GoogleCloudStorageInputReader() to fail a job. _mock_input_reader = lambda _, __: python_utils.divide(1, 0) input_reader_swap = self.swap( input_readers, 'GoogleCloudStorageInputReader', _mock_input_reader) assert_raises_context_manager = self.assertRaisesRegexp( Exception, r'Invalid status code change for job ' r'MockJobManagerOne-\w+-\w+: from new to failed') job_id = MockJobManagerOne.create_new() store_map_reduce_results = jobs.StoreMapReduceResults() with input_reader_swap, assert_raises_context_manager, logging_swap: store_map_reduce_results.run( job_id, 'core.jobs_test.MockJobManagerOne', 'output') expected_log_message = 'Job %s failed at' % job_id # The first log message is ignored as it is the traceback. self.assertEqual(len(observed_log_messages), 2) self.assertTrue( observed_log_messages[1].startswith(expected_log_message))
def test_get_time_in_millisecs(self): dt = datetime.datetime(2020, 6, 15) msecs = utils.get_time_in_millisecs(dt) self.assertEqual( dt, datetime.datetime.fromtimestamp(python_utils.divide(msecs, 1000.0)))
def _refresh_average_ratings(user_id, rating, old_rating): """Refreshes the average ratings in the given realtime layer. Args: user_id: str. The id of the user. rating: int. The new rating of the exploration. old_rating: int. The old rating of the exploration before refreshing. """ realtime_class = cls._get_realtime_datastore_class() realtime_model_id = realtime_class.get_realtime_id( active_realtime_layer, user_id) model = realtime_class.get(realtime_model_id, strict=False) if model is None: realtime_class(id=realtime_model_id, average_ratings=rating, num_ratings=1, realtime_layer=active_realtime_layer).put() else: num_ratings = model.num_ratings average_ratings = model.average_ratings num_ratings += 1 if average_ratings is not None: sum_of_ratings = (average_ratings * (num_ratings - 1) + rating) if old_rating is not None: sum_of_ratings -= old_rating num_ratings -= 1 model.average_ratings = python_utils.divide( sum_of_ratings, float(num_ratings)) else: model.average_ratings = rating model.num_ratings = num_ratings model.put()
def get_question_skill_links_equidistributed_by_skill( cls, total_question_count, skill_ids): """Fetches the list of constant number of QuestionSkillLinkModels linked to the skills. Args: total_question_count: int. The number of questions expected. skill_ids: list(str). The ids of skills for which the linked question ids are to be retrieved. Returns: list(QuestionSkillLinkModel). A list of QuestionSkillLinkModels corresponding to given skill_ids, with total_question_count/len(skill_ids) number of questions for each skill. If not evenly divisible, it will be rounded up. If not enough questions for a skill, just return all questions it links to. The order of questions will follow the order of given skill ids, but the order of questions for the same skill is random. """ question_count_per_skill = int( math.ceil( python_utils.divide(float(total_question_count), float(len(skill_ids))))) question_skill_link_models = [] for skill_id in skill_ids: question_skill_link_models.extend( cls.query( cls.skill_id == skill_id).fetch(question_count_per_skill)) return question_skill_link_models
def test_get_time_in_millisecs_with_complicated_time(self): dt = datetime.datetime(2020, 6, 15, 5, 18, 23, microsecond=123456) msecs = utils.get_time_in_millisecs(dt) self.assertEqual( dt, datetime.datetime.fromtimestamp(python_utils.divide(msecs, 1000.0)))
def test_failing_jobs(self): # Mocks GoogleCloudStorageInputReader() to fail a job. _mock_input_reader = lambda _, __: python_utils.divide(1, 0) input_reader_swap = self.swap(input_readers, 'GoogleCloudStorageInputReader', _mock_input_reader) job_id = MockJobManagerOne.create_new() store_map_reduce_results = jobs.StoreMapReduceResults() with python_utils.ExitStack() as stack: captured_logs = stack.enter_context( self.capture_logging(min_level=logging.ERROR)) stack.enter_context(input_reader_swap) stack.enter_context( self.assertRaisesRegexp( Exception, r'Invalid status code change for job ' r'MockJobManagerOne-\w+-\w+: from new to failed')) store_map_reduce_results.run(job_id, 'core.jobs_test.MockJobManagerOne', 'output') # The first log message is ignored as it is the traceback. self.assertEqual(len(captured_logs), 1) self.assertTrue(captured_logs[0].startswith('Job %s failed at' % job_id))
def compress_image(image_content, scaling_factor): """Compresses the image by resizing the image with the scaling factor. Note that if the image's dimensions, after the scaling factor is applied, exceed 4000 then the scaling factor will be recomputed and applied such that the larger dimension of the image does not exceed 4000 after resizing. This is due to an implementation limitation. See https://goo.gl/TJCbmE for context. Args: image_content: str. Content of the file to be compressed. scaling_factor: float. The number by which the dimensions of the image will be scaled. This is expected to be greater than zero. Returns: str. Returns the content of the compressed image. """ if not constants.DEV_MODE: height, width = get_image_dimensions(image_content) new_width = int(width * scaling_factor) new_height = int(height * scaling_factor) if (new_width > MAX_RESIZE_DIMENSION_PX or new_height > MAX_RESIZE_DIMENSION_PX): # Recompute the scaling factor such that the larger dimension does # not exceed 4000 when scaled. new_scaling_factor = (python_utils.divide( MAX_RESIZE_DIMENSION_PX, float(max(width, height)))) new_width = int(width * new_scaling_factor) new_height = int(height * new_scaling_factor) return images.resize(image_data=image_content, width=min(new_width, MAX_RESIZE_DIMENSION_PX), height=min(new_height, MAX_RESIZE_DIMENSION_PX)) else: return image_content
def test_multiple_plays_and_ratings_for_multiple_explorations(self): exploration_1 = self.save_new_default_exploration( self.EXP_ID_1, self.owner_id_1, title=self.EXP_TITLE_1) exploration_2 = self.save_new_default_exploration( self.EXP_ID_2, self.owner_id_1, title=self.EXP_TITLE_2) self.login(self.OWNER_EMAIL_1) response = self.get_json(feconf.CREATOR_DASHBOARD_DATA_URL) self.assertEqual(len(response['explorations_list']), 2) exp_version = self.EXP_DEFAULT_VERSION exp_id_1 = self.EXP_ID_1 state_1 = exploration_1.init_state_name exp_id_2 = self.EXP_ID_2 state_2 = exploration_2.init_state_name self._record_start(exp_id_1, exp_version, state_1) self._record_start(exp_id_2, exp_version, state_2) self._record_start(exp_id_2, exp_version, state_2) self._rate_exploration(exp_id_1, [4]) self._rate_exploration(exp_id_2, [3, 3]) self._run_user_stats_aggregator_job() user_model = user_models.UserStatsModel.get(self.owner_id_1) self.assertEqual(user_model.total_plays, 3) self.assertEqual( user_model.impact_score, self.USER_IMPACT_SCORE_DEFAULT) self.assertEqual(user_model.num_ratings, 3) self.assertEqual( user_model.average_ratings, python_utils.divide(10, 3.0)) self.logout()
def test_get_time_in_millisecs(self) -> None: dt = datetime.datetime(2020, 6, 15) msecs = utils.get_time_in_millisecs(dt) self.assertEqual( dt, datetime.datetime.fromtimestamp( python_utils.divide(msecs, 1000.0))) # type: ignore[no-untyped-call]
def get(self): """Handles GET requests.""" comma_separated_topic_ids = ( self.request.get('comma_separated_topic_ids')) topic_ids = comma_separated_topic_ids.split(',') topics = topic_fetchers.get_topics_by_ids(topic_ids) all_skill_ids = [] subtopic_mastery_dict = {} for ind, topic in enumerate(topics): if not topic: raise self.InvalidInputException('Invalid topic ID %s' % topic_ids[ind]) all_skill_ids.extend(topic.get_all_skill_ids()) all_skill_ids = list(set(all_skill_ids)) all_skills_mastery_dict = skill_services.get_multi_user_skill_mastery( self.user_id, all_skill_ids) for topic in topics: subtopic_mastery_dict[topic.id] = {} for subtopic in topic.subtopics: skill_mastery_dict = { skill_id: mastery for skill_id, mastery in all_skills_mastery_dict.items() if mastery is not None and skill_id in subtopic.skill_ids } if skill_mastery_dict: # Subtopic mastery is average of skill masteries. subtopic_mastery_dict[topic.id][subtopic.id] = ( python_utils.divide(sum(skill_mastery_dict.values()), len(skill_mastery_dict))) self.values.update({'subtopic_mastery_dict': subtopic_mastery_dict}) self.render_json(self.values)
def test_both_realtime_layer_and_batch_data(self): user_stats_aggregator_swap = self.swap(user_jobs_continuous, 'UserStatsAggregator', MockUserStatsAggregator) exploration_1 = self._create_exploration(self.EXP_ID_1, self.user_a_id) exploration_2 = self._create_exploration(self.EXP_ID_2, self.user_a_id) exp_id_1 = self.EXP_ID_1 exp_id_2 = self.EXP_ID_2 exp_version = self.EXP_DEFAULT_VERSION state_1 = exploration_1.init_state_name state_2 = exploration_2.init_state_name self._rate_exploration(exp_id_1, 2, 4) self._rate_exploration(exp_id_2, 4, 3) # Run the computation and check data from batch job. self._run_computation() user_stats_model = user_models.UserStatsModel.get(self.user_a_id) # The total plays is the sum of the number of starts of both the # exploration_1 and exploration_2 as defined in the # mock_get_statistics() method above. self.assertEqual(user_stats_model.total_plays, 14) self.assertEqual(user_stats_model.num_ratings, 6) self.assertEqual(user_stats_model.average_ratings, python_utils.divide(20, 6.0)) # Stop the batch job. Fire up a few events and check data from realtime # job. with user_stats_aggregator_swap: MockUserStatsAggregator.stop_computation(self.user_a_id) self._record_start(exp_id_1, exp_version, state_1) self._record_start(exp_id_2, exp_version, state_2) self._record_exploration_rating(exp_id_1, [2, 5]) self._record_exploration_rating(exp_id_2, [4, 1]) user_stats = ( user_jobs_continuous.UserStatsAggregator.get_dashboard_stats( self.user_a_id)) # After recording two start events, the total plays is now increased by # two. self.assertEqual(user_stats['total_plays'], 16) self.assertEqual(user_stats['num_ratings'], 10) self.assertEqual(user_stats['average_ratings'], python_utils.divide(32, 10.0))
def test_swap_to_always_raise_with_error(self): obj = mock.Mock() obj.func = lambda: python_utils.divide(1, 0) self.assertRaisesRegexp( ZeroDivisionError, 'integer division or modulo by zero', obj.func) with self.swap_to_always_raise(obj, 'func', error=ValueError('abc')): self.assertRaisesRegexp(ValueError, 'abc', obj.func)
def get_time_in_millisecs(datetime_obj): # type: (datetime.datetime) -> float """Returns time in milliseconds since the Epoch. Args: datetime_obj: datetime. An object of type datetime.datetime. Returns: float. The time in milliseconds since the Epoch. """ msecs = time.mktime(datetime_obj.timetuple()) * 1000.0 return msecs + python_utils.divide(datetime_obj.microsecond, 1000.0) # type: ignore[no-any-return, no-untyped-call]
def test_realtime_layer_batch_job_multiple_explorations_one_owner(self): self._create_exploration(self.EXP_ID_1, self.user_a_id) self._create_exploration(self.EXP_ID_2, self.user_a_id) self._record_exploration_rating(self.EXP_ID_1, [4, 5, 2]) self._record_exploration_rating(self.EXP_ID_2, [5, 2]) user_stats = ( user_jobs_continuous.UserStatsAggregator.get_dashboard_stats( self.user_a_id)) self.assertEqual(user_stats['total_plays'], 0) self.assertEqual(user_stats['num_ratings'], 5) self.assertEqual(user_stats['average_ratings'], python_utils.divide(18, 5.0))
def get_human_readable_time_string(time_msec: float) -> str: """Given a time in milliseconds since the epoch, get a human-readable time string for the admin dashboard. Args: time_msec: float. Time in milliseconds since the Epoch. Returns: str. A string representing the time. """ # Ignoring arg-type because we are preventing direct usage of 'str' for # Python3 compatibilty. return time.strftime( '%B %d %H:%M:%S', time.gmtime(python_utils.divide(time_msec, 1000.0))) # type: ignore[arg-type, no-untyped-call]
def get_question_skill_links_equidistributed_by_skill( cls, total_question_count, skill_ids): """Fetches the list of constant number of QuestionSkillLinkModels linked to the skills. Args: total_question_count: int. The number of questions expected. skill_ids: list(str). The ids of skills for which the linked question ids are to be retrieved. Returns: list(QuestionSkillLinkModel). A list of QuestionSkillLinkModels corresponding to given skill_ids, with total_question_count/len(skill_ids) number of questions for each skill. If not evenly divisible, it will be rounded up. If not enough questions for a skill, just return all questions it links to. The order of questions will follow the order of given skill ids, but the order of questions for the same skill is random. """ if len(skill_ids) > feconf.MAX_NUMBER_OF_SKILL_IDS: raise Exception('Please keep the number of skill IDs below 20.') question_count_per_skill = int( math.ceil( python_utils.divide(float(total_question_count), float(len(skill_ids))))) question_skill_link_models = [] existing_question_ids = [] for skill_id in skill_ids: query = cls.query(cls.skill_id == skill_id) # We fetch more questions here in order to try and ensure that the # eventual number of returned questions is sufficient to meet the # number requested, even after deduplication. new_question_skill_link_models = query.fetch( question_count_per_skill * 2) # Deduplicate if the same question is linked to multiple skills. for model in new_question_skill_link_models: if model.question_id in existing_question_ids: new_question_skill_link_models.remove(model) question_skill_link_models.extend( new_question_skill_link_models[:question_count_per_skill]) existing_question_ids.extend([ model.question_id for model in new_question_skill_link_models ]) return question_skill_link_models
def _validate_exploration_model_last_updated( cls, item, field_name_to_external_model_references): """Validate that item.exploration_model_last_updated matches the time when a last commit was made by a human contributor. Args: item: datastore_services.Model. ExpSummaryModel to validate. field_name_to_external_model_references: dict(str, (list(base_model_validators.ExternalModelReference))). A dict keyed by field name. The field name represents a unique identifier provided by the storage model to which the external model is associated. Each value contains a list of ExternalModelReference objects corresponding to the field_name. For examples, all the external Exploration Models corresponding to a storage model can be associated with the field name 'exp_ids'. This dict is used for validation of External Model properties linked to the storage model. """ exploration_model_references = ( field_name_to_external_model_references['exploration_ids']) for exploration_model_reference in exploration_model_references: exploration_model = exploration_model_reference.model_instance if exploration_model is None or exploration_model.deleted: model_class = exploration_model_reference.model_class model_id = exploration_model_reference.model_id cls._add_error( 'exploration_ids %s' % ( base_model_validators.ERROR_CATEGORY_FIELD_CHECK), 'Entity id %s: based on field exploration_ids having' ' value %s, expected model %s with id %s but it doesn\'t' ' exist' % ( item.id, model_id, model_class.__name__, model_id)) continue last_human_update_ms = exp_services.get_last_updated_by_human_ms( exploration_model.id) last_human_update_time = datetime.datetime.fromtimestamp( python_utils.divide(last_human_update_ms, 1000.0)) if item.exploration_model_last_updated != last_human_update_time: cls._add_error( 'exploration model %s' % ( base_model_validators.ERROR_CATEGORY_LAST_UPDATED_CHECK ), 'Entity id %s: The exploration_model_last_updated ' 'field: %s does not match the last time a commit was ' 'made by a human contributor: %s' % ( item.id, item.exploration_model_last_updated, last_human_update_time))
def get_dashboard_stats(cls, user_id): """Returns the dashboard stats associated with the given user_id. Args: user_id: str. The id of the user. Returns: dict. Has the keys: total_plays: int. Number of times the user's explorations were played. num_ratings: int. Number of times the explorations have been rated. average_ratings: float. Average of average ratings across all explorations. """ total_plays = 0 num_ratings = 0 average_ratings = None sum_of_ratings = 0 mr_model = user_models.UserStatsModel.get(user_id, strict=False) if mr_model is not None: total_plays += mr_model.total_plays num_ratings += mr_model.num_ratings if mr_model.average_ratings is not None: sum_of_ratings += (mr_model.average_ratings * mr_model.num_ratings) realtime_model = cls._get_realtime_datastore_class().get( cls.get_active_realtime_layer_id(user_id), strict=False) if realtime_model is not None: total_plays += realtime_model.total_plays num_ratings += realtime_model.num_ratings if realtime_model.average_ratings is not None: sum_of_ratings += (realtime_model.average_ratings * realtime_model.num_ratings) if num_ratings > 0: average_ratings = python_utils.divide(sum_of_ratings, float(num_ratings)) return { 'total_plays': total_plays, 'num_ratings': num_ratings, 'average_ratings': average_ratings }
def test_realtime_layer_batch_job_single_exploration_multiple_owners(self): exploration = self._create_exploration( self.EXP_ID_1, self.user_a_id) rights_manager.assign_role_for_exploration( self.user_a, self.EXP_ID_1, self.user_b_id, rights_domain.ROLE_OWNER) exp_version = self.EXP_DEFAULT_VERSION exp_id = self.EXP_ID_1 state = exploration.init_state_name self._record_start(exp_id, exp_version, state) self._record_start(exp_id, exp_version, state) self._record_exploration_rating(exp_id, [3, 4, 5]) self._record_exploration_rating(exp_id, [1, 5, 4]) expected_results = { 'total_plays': 2, 'num_ratings': 6, 'average_ratings': python_utils.divide(22, 6.0) } user_stats_1 = ( user_jobs_continuous.UserStatsAggregator.get_dashboard_stats( self.user_a_id)) self.assertEqual( user_stats_1['total_plays'], expected_results['total_plays']) self.assertEqual( user_stats_1['num_ratings'], expected_results['num_ratings']) self.assertEqual( user_stats_1['average_ratings'], expected_results['average_ratings']) user_stats_2 = ( user_jobs_continuous.UserStatsAggregator.get_dashboard_stats( self.user_b_id)) self.assertEqual( user_stats_2['total_plays'], expected_results['total_plays']) self.assertEqual( user_stats_2['num_ratings'], expected_results['num_ratings']) self.assertEqual( user_stats_2['average_ratings'], expected_results['average_ratings'])
def record_user_has_seen_notifications(user_id, last_seen_msecs): """Updates the last_checked time for this user (which represents the time the user last saw the notifications in the dashboard page or the notifications dropdown). Args: user_id: str. The user ID of the subscriber. last_seen_msecs: float. The time (in msecs since the Epoch) when the user last saw the notifications in the dashboard page or the notifications dropdown. """ subscriptions_model = user_models.UserSubscriptionsModel.get(user_id, strict=False) if not subscriptions_model: subscriptions_model = user_models.UserSubscriptionsModel(id=user_id) subscriptions_model.last_checked = datetime.datetime.utcfromtimestamp( python_utils.divide(last_seen_msecs, 1000.0)) subscriptions_model.put()
def validate_deleted(cls, item): """Validate that the models marked as deleted are hard-deleted after eight weeks. Args: item: datastore_services.Model. Entity to validate. """ cls.errors.clear() date_now = datetime.datetime.utcnow() date_before_which_models_should_be_deleted = ( date_now - feconf.PERIOD_TO_HARD_DELETE_MODELS_MARKED_AS_DELETED) period_to_hard_delete_models_in_days = ( feconf.PERIOD_TO_HARD_DELETE_MODELS_MARKED_AS_DELETED.days) if item.last_updated < date_before_which_models_should_be_deleted: cls._add_error( 'entity %s' % ERROR_CATEGORY_STALE_CHECK, 'Entity id %s: model marked as deleted is older than %s weeks' % (item.id, python_utils.divide(period_to_hard_delete_models_in_days, 7)))
def get_question_skill_links_based_on_difficulty_equidistributed_by_skill( cls, total_question_count, skill_ids, difficulty_requested): """Fetches the list of constant number of random QuestionSkillLinkModels linked to the skills, sorted by the absolute value of the difference between skill difficulty and the requested difficulty. Args: total_question_count: int. The number of questions expected. skill_ids: list(str). The ids of skills for which the linked question ids are to be retrieved. difficulty_requested: float. The skill difficulty of the questions requested to be fetched. Returns: list(QuestionSkillLinkModel). A list of random QuestionSkillLinkModels corresponding to given skill_ids, with total_question_count/len(skill_ids) number of questions for each skill. If not evenly divisible, it will be rounded up. If not enough questions for a skill, just return all questions it links to. """ if len(skill_ids) > feconf.MAX_NUMBER_OF_SKILL_IDS: raise Exception('Please keep the number of skill IDs below 20.') if not skill_ids: return [] question_count_per_skill = int( math.ceil(python_utils.divide( float(total_question_count), float(len(skill_ids))))) question_skill_link_mapping = {} # For fetching the questions randomly we have used a random offset. # But this is a temporary solution since this method scales linearly. # Other alternative methods were: # 1) Using a random id in question id filter # 2) Adding an additional column that can be filtered upon. # But these methods are not viable because google datastore limits # each query to have at most one inequality filter. So we can't filter # on both question_id and difficulty. Please see # https://github.com/oppia/oppia/pull/9061#issuecomment-629765809 # for more details. def get_offset(query): """Helper function to get the offset.""" question_count = query.count() if question_count > 2 * question_count_per_skill: return utils.get_random_int( question_count - (question_count_per_skill * 2)) return 0 for skill_id in skill_ids: query = cls.query(cls.skill_id == skill_id) equal_questions_query = query.filter( cls.skill_difficulty == difficulty_requested) # We fetch more questions here in order to try and ensure that the # eventual number of returned questions is sufficient to meet the # number requested, even after deduplication. new_question_skill_link_models = equal_questions_query.fetch( limit=question_count_per_skill * 2, offset=get_offset(equal_questions_query)) for model in new_question_skill_link_models: if model.question_id in question_skill_link_mapping: new_question_skill_link_models.remove(model) if len(new_question_skill_link_models) >= question_count_per_skill: new_question_skill_link_models = random.sample( new_question_skill_link_models, question_count_per_skill) else: # Fetch QuestionSkillLinkModels with difficulty smaller than # requested difficulty. easier_questions_query = query.filter( cls.skill_difficulty < difficulty_requested) easier_question_skill_link_models = ( easier_questions_query.fetch( limit=question_count_per_skill * 2, offset=get_offset(easier_questions_query))) for model in easier_question_skill_link_models: if model.question_id in question_skill_link_mapping: easier_question_skill_link_models.remove(model) question_extra_count = ( len(new_question_skill_link_models) + len(easier_question_skill_link_models) - question_count_per_skill) if question_extra_count >= 0: easier_question_skill_link_models = random.sample( easier_question_skill_link_models, question_count_per_skill - len(new_question_skill_link_models) ) new_question_skill_link_models.extend( easier_question_skill_link_models) else: # Fetch QuestionSkillLinkModels with difficulty larger than # requested difficulty. new_question_skill_link_models.extend( easier_question_skill_link_models) harder_questions_query = query.filter( cls.skill_difficulty > difficulty_requested) harder_question_skill_link_models = ( harder_questions_query.fetch( limit=question_count_per_skill * 2, offset=get_offset(harder_questions_query))) harder_question_skill_link_models = ( harder_questions_query.fetch()) for model in harder_question_skill_link_models: if model.question_id in question_skill_link_mapping: harder_question_skill_link_models.remove(model) question_extra_count = ( len(new_question_skill_link_models) + len(harder_question_skill_link_models) - question_count_per_skill) if question_extra_count >= 0: harder_question_skill_link_models = ( random.sample( harder_question_skill_link_models, question_count_per_skill - len(new_question_skill_link_models) )) new_question_skill_link_models.extend( harder_question_skill_link_models) new_question_skill_link_models = ( new_question_skill_link_models[:question_count_per_skill]) for model in new_question_skill_link_models: if model.question_id not in question_skill_link_mapping: question_skill_link_mapping[model.question_id] = model return list(question_skill_link_mapping.values())
def map(item): """Implements the map function (generator). Computes exploration data for every contributor and owner of the exploration. Args: item: ExpSummaryModel. An instance of ExpSummaryModel. Yields: tuple(owner_id, exploration_data), where: owner_id: str. The unique id of the user. exploration_data: dict. Has the keys: exploration_impact_score: float. The impact score of all the explorations contributed to by the user. total_plays_for_owned_exp: int. Total plays of all explorations owned by the user. average_rating_for_owned_exp: float. Average of average ratings of all explorations owned by the user. num_ratings_for_owned_exp: int. Total number of ratings of all explorations owned by the user. """ if item.deleted: return exponent = python_utils.divide(2.0, 3) # This is set to False only when the exploration impact score is not # valid to be calculated. calculate_exploration_impact_score = True # Get average rating and value per user. total_rating = 0 for ratings_value in item.ratings: total_rating += item.ratings[ratings_value] * int(ratings_value) sum_of_ratings = sum(item.ratings.values()) average_rating = (python_utils.divide(total_rating, sum_of_ratings) if sum_of_ratings else None) if average_rating is not None: value_per_user = average_rating - 2 if value_per_user <= 0: calculate_exploration_impact_score = False else: calculate_exploration_impact_score = False exploration_stats = stats_services.get_exploration_stats( item.id, item.version) # For each state, find the number of first entries to the state. # This is considered to be approximately equal to the number of # users who answered the state because very few users enter a state # and leave without answering anything at all. answer_count = exploration_stats.get_sum_of_first_hit_counts() num_starts = exploration_stats.num_starts # Turn answer count into reach. reach = answer_count**exponent exploration_summary = exp_fetchers.get_exploration_summary_by_id( item.id) contributors = exploration_summary.contributors_summary total_commits = sum(contributors.values()) if total_commits == 0: calculate_exploration_impact_score = False mapped_owner_ids = [] for contrib_id in contributors: exploration_data = {} # Set the value of exploration impact score only if it needs to be # calculated. if calculate_exploration_impact_score: # Find fractional contribution for each contributor. contribution = (python_utils.divide(contributors[contrib_id], float(total_commits))) # Find score for this specific exploration. exploration_data.update({ 'exploration_impact_score': (value_per_user * reach * contribution) }) # If the user is an owner for the exploration, then update dict with # 'average ratings' and 'total plays' as well. if contrib_id in exploration_summary.owner_ids: mapped_owner_ids.append(contrib_id) # Get number of starts (total plays) for the exploration. exploration_data.update( {'total_plays_for_owned_exp': num_starts}) # Update data with average rating only if it is not None. if average_rating is not None: exploration_data.update({ 'average_rating_for_owned_exp': average_rating, 'num_ratings_for_owned_exp': sum_of_ratings }) yield (contrib_id, exploration_data) for owner_id in exploration_summary.owner_ids: if owner_id not in mapped_owner_ids: mapped_owner_ids.append(owner_id) # Get number of starts (total plays) for the exploration. exploration_data = {'total_plays_for_owned_exp': num_starts} # Update data with average rating only if it is not None. if average_rating is not None: exploration_data.update({ 'average_rating_for_owned_exp': average_rating, 'num_ratings_for_owned_exp': sum_of_ratings }) yield (owner_id, exploration_data)
def get_question_skill_links_based_on_difficulty_equidistributed_by_skill( cls, total_question_count, skill_ids, difficulty_requested): """Fetches the list of constant number of QuestionSkillLinkModels linked to the skills, sorted by the absolute value of the difference between skill difficulty and the requested difficulty. Args: total_question_count: int. The number of questions expected. skill_ids: list(str). The ids of skills for which the linked question ids are to be retrieved. difficulty_requested: float. The skill difficulty of the questions requested to be fetched. Returns: list(QuestionSkillLinkModel). A list of QuestionSkillLinkModels corresponding to given skill_ids, with total_question_count/len(skill_ids) number of questions for each skill. If not evenly divisible, it will be rounded up. If not enough questions for a skill, just return all questions it links to. The order of questions will follow the order of given skill ids, and the order of questions for the same skill follows the absolute value of the difference between skill difficulty and the requested difficulty. """ if len(skill_ids) > feconf.MAX_NUMBER_OF_SKILL_IDS: raise Exception('Please keep the number of skill IDs below 20.') question_count_per_skill = int( math.ceil(python_utils.divide( float(total_question_count), float(len(skill_ids))))) question_skill_link_mapping = {} for skill_id in skill_ids: query = cls.query(cls.skill_id == skill_id) equal_questions_query = query.filter( cls.skill_difficulty == difficulty_requested) # We fetch more questions here in order to try and ensure that the # eventual number of returned questions is sufficient to meet the # number requested, even after deduplication. new_question_skill_link_models = ( equal_questions_query.fetch(question_count_per_skill * 2)) for model in new_question_skill_link_models: if model.question_id in question_skill_link_mapping: new_question_skill_link_models.remove(model) if len(new_question_skill_link_models) < question_count_per_skill: # Fetch QuestionSkillLinkModels with difficulty smaller than # requested difficulty and sort them by decreasing difficulty. easier_questions_query = query.filter( cls.skill_difficulty < difficulty_requested) easier_questions_query = easier_questions_query.order( -cls.skill_difficulty) easier_question_skill_link_models = ( easier_questions_query.fetch(question_count_per_skill)) for model in easier_question_skill_link_models: if model.question_id in question_skill_link_mapping: easier_question_skill_link_models.remove(model) new_question_skill_link_models.extend( easier_question_skill_link_models) if (len(new_question_skill_link_models) < question_count_per_skill): # Fetch QuestionSkillLinkModels with difficulty larger than # requested difficulty and sort them by increasing # difficulty. harder_questions_query = query.filter( cls.skill_difficulty > difficulty_requested) harder_questions_query = harder_questions_query.order( cls.skill_difficulty) harder_question_skill_link_models = ( harder_questions_query.fetch(question_count_per_skill)) for model in harder_question_skill_link_models: if model.question_id in question_skill_link_mapping: harder_question_skill_link_models.remove(model) new_question_skill_link_models.extend( harder_question_skill_link_models) # Sort QuestionSkillLinkModels by the difference between their # difficulty and requested difficulty. new_question_skill_link_models = sorted( new_question_skill_link_models, key=lambda model: abs( model.skill_difficulty - difficulty_requested) ) new_question_skill_link_models = ( new_question_skill_link_models[:question_count_per_skill]) for model in new_question_skill_link_models: if model.question_id not in question_skill_link_mapping: question_skill_link_mapping[model.question_id] = model return list(question_skill_link_mapping.values())
def test_stats_for_multiple_explorations_with_multiple_owners(self): exploration_1 = self.save_new_default_exploration( self.EXP_ID_1, self.owner_id_1, title=self.EXP_TITLE_1) exploration_2 = self.save_new_default_exploration( self.EXP_ID_2, self.owner_id_1, title=self.EXP_TITLE_2) rights_manager.assign_role_for_exploration( self.owner_1, self.EXP_ID_1, self.owner_id_2, rights_domain.ROLE_OWNER) rights_manager.assign_role_for_exploration( self.owner_1, self.EXP_ID_2, self.owner_id_2, rights_domain.ROLE_OWNER) self.login(self.OWNER_EMAIL_2) response = self.get_json(feconf.CREATOR_DASHBOARD_DATA_URL) self.assertEqual(len(response['explorations_list']), 2) exp_version = self.EXP_DEFAULT_VERSION exp_id_1 = self.EXP_ID_1 state_1 = exploration_1.init_state_name exp_id_2 = self.EXP_ID_2 state_2 = exploration_2.init_state_name self._record_start(exp_id_1, exp_version, state_1) self._record_start(exp_id_1, exp_version, state_1) self._record_start(exp_id_2, exp_version, state_2) self._record_start(exp_id_2, exp_version, state_2) self._record_start(exp_id_2, exp_version, state_2) self._rate_exploration(exp_id_1, [5, 3]) self._rate_exploration(exp_id_2, [5, 5]) self._run_user_stats_aggregator_job() expected_results = { 'total_plays': 5, 'num_ratings': 4, 'average_ratings': python_utils.divide(18, 4.0) } user_model_2 = user_models.UserStatsModel.get(self.owner_id_2) self.assertEqual( user_model_2.total_plays, expected_results['total_plays']) self.assertEqual( user_model_2.impact_score, self.USER_IMPACT_SCORE_DEFAULT) self.assertEqual( user_model_2.num_ratings, expected_results['num_ratings']) self.assertEqual( user_model_2.average_ratings, expected_results['average_ratings']) self.logout() self.login(self.OWNER_EMAIL_1) response = self.get_json(feconf.CREATOR_DASHBOARD_DATA_URL) self.assertEqual(len(response['explorations_list']), 2) user_model_1 = user_models.UserStatsModel.get(self.owner_id_1) self.assertEqual( user_model_1.total_plays, expected_results['total_plays']) self.assertEqual( user_model_1.impact_score, self.USER_IMPACT_SCORE_DEFAULT) self.assertEqual( user_model_1.num_ratings, expected_results['num_ratings']) self.assertEqual( user_model_1.average_ratings, expected_results['average_ratings']) self.logout()