Example #1
0
    def reduce(key, stringified_values):
        """Implements the reduce function.

        This function creates or updates the UserStatsModel instance for the
        given user. It updates the impact score, total plays of all
        explorations, number of ratings across all explorations and average
        rating.

        Args:
            key: str. The unique id of the user.
            stringified_values: list(str). A list of information regarding all
                the explorations that this user contributes to or owns. Each
                entry is a stringified dict having the following keys:
                    exploration_impact_score: float. The impact score of all the
                        explorations contributed to by the user.
                    total_plays_for_owned_exp: int. Total plays of all
                        explorations owned by the user.
                    average_rating_for_owned_exp: float. Average of average
                        ratings of all explorations owned by the user.
                    num_ratings_for_owned_exp: int. Total number of ratings of
                        all explorations owned by the user.
        """
        values = [ast.literal_eval(v) for v in stringified_values]
        exponent = python_utils.divide(2.0, 3)

        # Find the final score and round to a whole number.
        user_impact_score = int(
            python_utils.ROUND(
                sum(value['exploration_impact_score'] for value in values
                    if value.get('exploration_impact_score'))**exponent))

        # Sum up the total plays for all explorations.
        total_plays = sum(value['total_plays_for_owned_exp']
                          for value in values
                          if value.get('total_plays_for_owned_exp'))

        # Sum of ratings across all explorations.
        sum_of_ratings = 0
        # Number of ratings across all explorations.
        num_ratings = 0

        for value in values:
            if value.get('num_ratings_for_owned_exp'):
                num_ratings += value['num_ratings_for_owned_exp']
                sum_of_ratings += (value['average_rating_for_owned_exp'] *
                                   value['num_ratings_for_owned_exp'])

        mr_model = user_models.UserStatsModel.get_or_create(key)
        mr_model.impact_score = user_impact_score
        mr_model.total_plays = total_plays
        mr_model.num_ratings = num_ratings
        if sum_of_ratings != 0:
            average_ratings = python_utils.divide(sum_of_ratings,
                                                  float(num_ratings))
            mr_model.average_ratings = average_ratings
        mr_model.update_timestamps()
        mr_model.put()
Example #2
0
def _refresh_average_ratings_transactional(user_id, new_rating, old_rating):
    """Refreshes the average rating for a user.

    Args:
        user_id: str. The id of the user.
        new_rating: int. The new rating of the exploration.
        old_rating: int|None. The old rating of the exploration before
            refreshing, or None if the exploration hasn't been rated by the user
            yet.
    """
    user_stats_model = user_models.UserStatsModel.get(user_id, strict=False)
    if user_stats_model is None:
        user_models.UserStatsModel(id=user_id,
                                   average_ratings=new_rating,
                                   num_ratings=1).put()
        return

    num_ratings = user_stats_model.num_ratings
    average_ratings = user_stats_model.average_ratings
    if average_ratings is None:
        average_ratings = new_rating
        num_ratings += 1
    else:
        sum_of_ratings = (average_ratings * num_ratings) + new_rating
        if old_rating is None:
            num_ratings += 1
        else:
            sum_of_ratings -= old_rating
        average_ratings = python_utils.divide(sum_of_ratings,
                                              float(num_ratings))
    user_stats_model.average_ratings = average_ratings
    user_stats_model.num_ratings = num_ratings
    user_stats_model.update_timestamps()
    user_stats_model.put()
Example #3
0
    def test_failing_jobs(self):
        observed_log_messages = []

        def _mock_logging_function(msg, *args):
            """Mocks logging.error()."""
            observed_log_messages.append(msg % args)

        logging_swap = self.swap(logging, 'error', _mock_logging_function)

        # Mocks GoogleCloudStorageInputReader() to fail a job.
        _mock_input_reader = lambda _, __: python_utils.divide(1, 0)

        input_reader_swap = self.swap(
            input_readers, 'GoogleCloudStorageInputReader', _mock_input_reader)
        assert_raises_context_manager = self.assertRaisesRegexp(
            Exception,
            r'Invalid status code change for job '
            r'MockJobManagerOne-\w+-\w+: from new to failed')

        job_id = MockJobManagerOne.create_new()
        store_map_reduce_results = jobs.StoreMapReduceResults()

        with input_reader_swap, assert_raises_context_manager, logging_swap:
            store_map_reduce_results.run(
                job_id, 'core.jobs_test.MockJobManagerOne', 'output')

        expected_log_message = 'Job %s failed at' % job_id

        # The first log message is ignored as it is the traceback.
        self.assertEqual(len(observed_log_messages), 2)
        self.assertTrue(
            observed_log_messages[1].startswith(expected_log_message))
Example #4
0
 def test_get_time_in_millisecs(self):
     dt = datetime.datetime(2020, 6, 15)
     msecs = utils.get_time_in_millisecs(dt)
     self.assertEqual(
         dt,
         datetime.datetime.fromtimestamp(python_utils.divide(msecs,
                                                             1000.0)))
        def _refresh_average_ratings(user_id, rating, old_rating):
            """Refreshes the average ratings in the given realtime layer.

            Args:
                user_id: str. The id of the user.
                rating: int. The new rating of the exploration.
                old_rating: int. The old rating of the exploration before
                    refreshing.
            """
            realtime_class = cls._get_realtime_datastore_class()
            realtime_model_id = realtime_class.get_realtime_id(
                active_realtime_layer, user_id)

            model = realtime_class.get(realtime_model_id, strict=False)
            if model is None:
                realtime_class(id=realtime_model_id,
                               average_ratings=rating,
                               num_ratings=1,
                               realtime_layer=active_realtime_layer).put()
            else:
                num_ratings = model.num_ratings
                average_ratings = model.average_ratings
                num_ratings += 1
                if average_ratings is not None:
                    sum_of_ratings = (average_ratings * (num_ratings - 1) +
                                      rating)
                    if old_rating is not None:
                        sum_of_ratings -= old_rating
                        num_ratings -= 1
                    model.average_ratings = python_utils.divide(
                        sum_of_ratings, float(num_ratings))
                else:
                    model.average_ratings = rating
                model.num_ratings = num_ratings
                model.put()
Example #6
0
    def get_question_skill_links_equidistributed_by_skill(
            cls, total_question_count, skill_ids):
        """Fetches the list of constant number of QuestionSkillLinkModels
        linked to the skills.

        Args:
            total_question_count: int. The number of questions expected.
            skill_ids: list(str). The ids of skills for which the linked
                question ids are to be retrieved.

        Returns:
            list(QuestionSkillLinkModel). A list of QuestionSkillLinkModels
                corresponding to given skill_ids, with
                total_question_count/len(skill_ids) number of questions for
                each skill. If not evenly divisible, it will be rounded up.
                If not enough questions for a skill, just return all questions
                it links to. The order of questions will follow the order of
                given skill ids, but the order of questions for the same skill
                is random.
        """
        question_count_per_skill = int(
            math.ceil(
                python_utils.divide(float(total_question_count),
                                    float(len(skill_ids)))))
        question_skill_link_models = []
        for skill_id in skill_ids:
            question_skill_link_models.extend(
                cls.query(
                    cls.skill_id == skill_id).fetch(question_count_per_skill))
        return question_skill_link_models
Example #7
0
 def test_get_time_in_millisecs_with_complicated_time(self):
     dt = datetime.datetime(2020, 6, 15, 5, 18, 23, microsecond=123456)
     msecs = utils.get_time_in_millisecs(dt)
     self.assertEqual(
         dt,
         datetime.datetime.fromtimestamp(python_utils.divide(msecs,
                                                             1000.0)))
Example #8
0
    def test_failing_jobs(self):
        # Mocks GoogleCloudStorageInputReader() to fail a job.
        _mock_input_reader = lambda _, __: python_utils.divide(1, 0)

        input_reader_swap = self.swap(input_readers,
                                      'GoogleCloudStorageInputReader',
                                      _mock_input_reader)

        job_id = MockJobManagerOne.create_new()
        store_map_reduce_results = jobs.StoreMapReduceResults()

        with python_utils.ExitStack() as stack:
            captured_logs = stack.enter_context(
                self.capture_logging(min_level=logging.ERROR))
            stack.enter_context(input_reader_swap)
            stack.enter_context(
                self.assertRaisesRegexp(
                    Exception, r'Invalid status code change for job '
                    r'MockJobManagerOne-\w+-\w+: from new to failed'))

            store_map_reduce_results.run(job_id,
                                         'core.jobs_test.MockJobManagerOne',
                                         'output')

        # The first log message is ignored as it is the traceback.
        self.assertEqual(len(captured_logs), 1)
        self.assertTrue(captured_logs[0].startswith('Job %s failed at' %
                                                    job_id))
Example #9
0
def compress_image(image_content, scaling_factor):
    """Compresses the image by resizing the image with the scaling factor.

    Note that if the image's dimensions, after the scaling factor is applied,
    exceed 4000 then the scaling factor will be recomputed and applied such that
    the larger dimension of the image does not exceed 4000 after resizing. This
    is due to an implementation limitation. See https://goo.gl/TJCbmE for
    context.

    Args:
        image_content: str. Content of the file to be compressed.
        scaling_factor: float. The number by which the dimensions of the image
            will be scaled. This is expected to be greater than zero.

    Returns:
        str. Returns the content of the compressed image.
    """
    if not constants.DEV_MODE:
        height, width = get_image_dimensions(image_content)
        new_width = int(width * scaling_factor)
        new_height = int(height * scaling_factor)
        if (new_width > MAX_RESIZE_DIMENSION_PX
                or new_height > MAX_RESIZE_DIMENSION_PX):
            # Recompute the scaling factor such that the larger dimension does
            # not exceed 4000 when scaled.
            new_scaling_factor = (python_utils.divide(
                MAX_RESIZE_DIMENSION_PX, float(max(width, height))))
            new_width = int(width * new_scaling_factor)
            new_height = int(height * new_scaling_factor)
        return images.resize(image_data=image_content,
                             width=min(new_width, MAX_RESIZE_DIMENSION_PX),
                             height=min(new_height, MAX_RESIZE_DIMENSION_PX))
    else:
        return image_content
Example #10
0
    def test_multiple_plays_and_ratings_for_multiple_explorations(self):
        exploration_1 = self.save_new_default_exploration(
            self.EXP_ID_1, self.owner_id_1, title=self.EXP_TITLE_1)
        exploration_2 = self.save_new_default_exploration(
            self.EXP_ID_2, self.owner_id_1, title=self.EXP_TITLE_2)

        self.login(self.OWNER_EMAIL_1)
        response = self.get_json(feconf.CREATOR_DASHBOARD_DATA_URL)
        self.assertEqual(len(response['explorations_list']), 2)

        exp_version = self.EXP_DEFAULT_VERSION

        exp_id_1 = self.EXP_ID_1
        state_1 = exploration_1.init_state_name
        exp_id_2 = self.EXP_ID_2
        state_2 = exploration_2.init_state_name

        self._record_start(exp_id_1, exp_version, state_1)
        self._record_start(exp_id_2, exp_version, state_2)
        self._record_start(exp_id_2, exp_version, state_2)

        self._rate_exploration(exp_id_1, [4])
        self._rate_exploration(exp_id_2, [3, 3])

        self._run_user_stats_aggregator_job()

        user_model = user_models.UserStatsModel.get(self.owner_id_1)
        self.assertEqual(user_model.total_plays, 3)
        self.assertEqual(
            user_model.impact_score, self.USER_IMPACT_SCORE_DEFAULT)
        self.assertEqual(user_model.num_ratings, 3)
        self.assertEqual(
            user_model.average_ratings, python_utils.divide(10, 3.0))
        self.logout()
Example #11
0
 def test_get_time_in_millisecs(self) -> None:
     dt = datetime.datetime(2020, 6, 15)
     msecs = utils.get_time_in_millisecs(dt)
     self.assertEqual(
         dt,
         datetime.datetime.fromtimestamp(
             python_utils.divide(msecs, 1000.0))) # type: ignore[no-untyped-call]
Example #12
0
    def get(self):
        """Handles GET requests."""
        comma_separated_topic_ids = (
            self.request.get('comma_separated_topic_ids'))
        topic_ids = comma_separated_topic_ids.split(',')
        topics = topic_fetchers.get_topics_by_ids(topic_ids)
        all_skill_ids = []
        subtopic_mastery_dict = {}

        for ind, topic in enumerate(topics):
            if not topic:
                raise self.InvalidInputException('Invalid topic ID %s' %
                                                 topic_ids[ind])
            all_skill_ids.extend(topic.get_all_skill_ids())

        all_skill_ids = list(set(all_skill_ids))
        all_skills_mastery_dict = skill_services.get_multi_user_skill_mastery(
            self.user_id, all_skill_ids)
        for topic in topics:
            subtopic_mastery_dict[topic.id] = {}
            for subtopic in topic.subtopics:
                skill_mastery_dict = {
                    skill_id: mastery
                    for skill_id, mastery in all_skills_mastery_dict.items()
                    if mastery is not None and skill_id in subtopic.skill_ids
                }
                if skill_mastery_dict:
                    # Subtopic mastery is average of skill masteries.
                    subtopic_mastery_dict[topic.id][subtopic.id] = (
                        python_utils.divide(sum(skill_mastery_dict.values()),
                                            len(skill_mastery_dict)))

        self.values.update({'subtopic_mastery_dict': subtopic_mastery_dict})
        self.render_json(self.values)
Example #13
0
    def test_both_realtime_layer_and_batch_data(self):
        user_stats_aggregator_swap = self.swap(user_jobs_continuous,
                                               'UserStatsAggregator',
                                               MockUserStatsAggregator)
        exploration_1 = self._create_exploration(self.EXP_ID_1, self.user_a_id)
        exploration_2 = self._create_exploration(self.EXP_ID_2, self.user_a_id)

        exp_id_1 = self.EXP_ID_1
        exp_id_2 = self.EXP_ID_2
        exp_version = self.EXP_DEFAULT_VERSION
        state_1 = exploration_1.init_state_name
        state_2 = exploration_2.init_state_name

        self._rate_exploration(exp_id_1, 2, 4)
        self._rate_exploration(exp_id_2, 4, 3)

        # Run the computation and check data from batch job.
        self._run_computation()
        user_stats_model = user_models.UserStatsModel.get(self.user_a_id)
        # The total plays is the sum of the number of starts of both the
        # exploration_1 and exploration_2 as defined in the
        # mock_get_statistics() method above.
        self.assertEqual(user_stats_model.total_plays, 14)
        self.assertEqual(user_stats_model.num_ratings, 6)
        self.assertEqual(user_stats_model.average_ratings,
                         python_utils.divide(20, 6.0))

        # Stop the batch job. Fire up a few events and check data from realtime
        # job.
        with user_stats_aggregator_swap:
            MockUserStatsAggregator.stop_computation(self.user_a_id)

        self._record_start(exp_id_1, exp_version, state_1)
        self._record_start(exp_id_2, exp_version, state_2)
        self._record_exploration_rating(exp_id_1, [2, 5])
        self._record_exploration_rating(exp_id_2, [4, 1])

        user_stats = (
            user_jobs_continuous.UserStatsAggregator.get_dashboard_stats(
                self.user_a_id))
        # After recording two start events, the total plays is now increased by
        # two.
        self.assertEqual(user_stats['total_plays'], 16)
        self.assertEqual(user_stats['num_ratings'], 10)
        self.assertEqual(user_stats['average_ratings'],
                         python_utils.divide(32, 10.0))
Example #14
0
    def test_swap_to_always_raise_with_error(self):
        obj = mock.Mock()
        obj.func = lambda: python_utils.divide(1, 0)

        self.assertRaisesRegexp(
            ZeroDivisionError, 'integer division or modulo by zero', obj.func)

        with self.swap_to_always_raise(obj, 'func', error=ValueError('abc')):
            self.assertRaisesRegexp(ValueError, 'abc', obj.func)
Example #15
0
def get_time_in_millisecs(datetime_obj):
    # type: (datetime.datetime) -> float
    """Returns time in milliseconds since the Epoch.

    Args:
        datetime_obj: datetime. An object of type datetime.datetime.

    Returns:
        float. The time in milliseconds since the Epoch.
    """
    msecs = time.mktime(datetime_obj.timetuple()) * 1000.0
    return msecs + python_utils.divide(datetime_obj.microsecond, 1000.0) # type: ignore[no-any-return, no-untyped-call]
Example #16
0
    def test_realtime_layer_batch_job_multiple_explorations_one_owner(self):
        self._create_exploration(self.EXP_ID_1, self.user_a_id)
        self._create_exploration(self.EXP_ID_2, self.user_a_id)

        self._record_exploration_rating(self.EXP_ID_1, [4, 5, 2])
        self._record_exploration_rating(self.EXP_ID_2, [5, 2])

        user_stats = (
            user_jobs_continuous.UserStatsAggregator.get_dashboard_stats(
                self.user_a_id))
        self.assertEqual(user_stats['total_plays'], 0)
        self.assertEqual(user_stats['num_ratings'], 5)
        self.assertEqual(user_stats['average_ratings'],
                         python_utils.divide(18, 5.0))
Example #17
0
def get_human_readable_time_string(time_msec: float) -> str:
    """Given a time in milliseconds since the epoch, get a human-readable
    time string for the admin dashboard.

    Args:
        time_msec: float. Time in milliseconds since the Epoch.

    Returns:
        str. A string representing the time.
    """
    # Ignoring arg-type because we are preventing direct usage of 'str' for
    # Python3 compatibilty.
    return time.strftime(
        '%B %d %H:%M:%S', time.gmtime(python_utils.divide(time_msec, 1000.0))) # type: ignore[arg-type, no-untyped-call]
Example #18
0
    def get_question_skill_links_equidistributed_by_skill(
            cls, total_question_count, skill_ids):
        """Fetches the list of constant number of QuestionSkillLinkModels
        linked to the skills.

        Args:
            total_question_count: int. The number of questions expected.
            skill_ids: list(str). The ids of skills for which the linked
                question ids are to be retrieved.

        Returns:
            list(QuestionSkillLinkModel). A list of QuestionSkillLinkModels
                corresponding to given skill_ids, with
                total_question_count/len(skill_ids) number of questions for
                each skill. If not evenly divisible, it will be rounded up.
                If not enough questions for a skill, just return all questions
                it links to. The order of questions will follow the order of
                given skill ids, but the order of questions for the same skill
                is random.
        """
        if len(skill_ids) > feconf.MAX_NUMBER_OF_SKILL_IDS:
            raise Exception('Please keep the number of skill IDs below 20.')

        question_count_per_skill = int(
            math.ceil(
                python_utils.divide(float(total_question_count),
                                    float(len(skill_ids)))))
        question_skill_link_models = []
        existing_question_ids = []

        for skill_id in skill_ids:
            query = cls.query(cls.skill_id == skill_id)
            # We fetch more questions here in order to try and ensure that the
            # eventual number of returned questions is sufficient to meet the
            # number requested, even after deduplication.
            new_question_skill_link_models = query.fetch(
                question_count_per_skill * 2)

            # Deduplicate if the same question is linked to multiple skills.
            for model in new_question_skill_link_models:
                if model.question_id in existing_question_ids:
                    new_question_skill_link_models.remove(model)

            question_skill_link_models.extend(
                new_question_skill_link_models[:question_count_per_skill])
            existing_question_ids.extend([
                model.question_id for model in new_question_skill_link_models
            ])

        return question_skill_link_models
Example #19
0
    def _validate_exploration_model_last_updated(
            cls, item, field_name_to_external_model_references):
        """Validate that item.exploration_model_last_updated matches the
        time when a last commit was made by a human contributor.

        Args:
            item: datastore_services.Model. ExpSummaryModel to validate.
            field_name_to_external_model_references:
                dict(str, (list(base_model_validators.ExternalModelReference))).
                A dict keyed by field name. The field name represents
                a unique identifier provided by the storage
                model to which the external model is associated. Each value
                contains a list of ExternalModelReference objects corresponding
                to the field_name. For examples, all the external Exploration
                Models corresponding to a storage model can be associated
                with the field name 'exp_ids'. This dict is used for
                validation of External Model properties linked to the
                storage model.
        """
        exploration_model_references = (
            field_name_to_external_model_references['exploration_ids'])

        for exploration_model_reference in exploration_model_references:
            exploration_model = exploration_model_reference.model_instance
            if exploration_model is None or exploration_model.deleted:
                model_class = exploration_model_reference.model_class
                model_id = exploration_model_reference.model_id
                cls._add_error(
                    'exploration_ids %s' % (
                        base_model_validators.ERROR_CATEGORY_FIELD_CHECK),
                    'Entity id %s: based on field exploration_ids having'
                    ' value %s, expected model %s with id %s but it doesn\'t'
                    ' exist' % (
                        item.id, model_id, model_class.__name__, model_id))
                continue
            last_human_update_ms = exp_services.get_last_updated_by_human_ms(
                exploration_model.id)
            last_human_update_time = datetime.datetime.fromtimestamp(
                python_utils.divide(last_human_update_ms, 1000.0))
            if item.exploration_model_last_updated != last_human_update_time:
                cls._add_error(
                    'exploration model %s' % (
                        base_model_validators.ERROR_CATEGORY_LAST_UPDATED_CHECK
                    ),
                    'Entity id %s: The exploration_model_last_updated '
                    'field: %s does not match the last time a commit was '
                    'made by a human contributor: %s' % (
                        item.id, item.exploration_model_last_updated,
                        last_human_update_time))
    def get_dashboard_stats(cls, user_id):
        """Returns the dashboard stats associated with the given user_id.

        Args:
            user_id: str. The id of the user.

        Returns:
            dict. Has the keys:
                total_plays: int. Number of times the user's explorations were
                    played.
                num_ratings: int. Number of times the explorations have been
                    rated.
                average_ratings: float. Average of average ratings across all
                    explorations.
        """
        total_plays = 0
        num_ratings = 0
        average_ratings = None

        sum_of_ratings = 0

        mr_model = user_models.UserStatsModel.get(user_id, strict=False)
        if mr_model is not None:
            total_plays += mr_model.total_plays
            num_ratings += mr_model.num_ratings
            if mr_model.average_ratings is not None:
                sum_of_ratings += (mr_model.average_ratings *
                                   mr_model.num_ratings)

        realtime_model = cls._get_realtime_datastore_class().get(
            cls.get_active_realtime_layer_id(user_id), strict=False)

        if realtime_model is not None:
            total_plays += realtime_model.total_plays
            num_ratings += realtime_model.num_ratings
            if realtime_model.average_ratings is not None:
                sum_of_ratings += (realtime_model.average_ratings *
                                   realtime_model.num_ratings)

        if num_ratings > 0:
            average_ratings = python_utils.divide(sum_of_ratings,
                                                  float(num_ratings))

        return {
            'total_plays': total_plays,
            'num_ratings': num_ratings,
            'average_ratings': average_ratings
        }
Example #21
0
    def test_realtime_layer_batch_job_single_exploration_multiple_owners(self):
        exploration = self._create_exploration(
            self.EXP_ID_1, self.user_a_id)

        rights_manager.assign_role_for_exploration(
            self.user_a, self.EXP_ID_1, self.user_b_id,
            rights_domain.ROLE_OWNER)

        exp_version = self.EXP_DEFAULT_VERSION
        exp_id = self.EXP_ID_1
        state = exploration.init_state_name

        self._record_start(exp_id, exp_version, state)
        self._record_start(exp_id, exp_version, state)
        self._record_exploration_rating(exp_id, [3, 4, 5])
        self._record_exploration_rating(exp_id, [1, 5, 4])

        expected_results = {
            'total_plays': 2,
            'num_ratings': 6,
            'average_ratings': python_utils.divide(22, 6.0)
        }

        user_stats_1 = (
            user_jobs_continuous.UserStatsAggregator.get_dashboard_stats(
                self.user_a_id))
        self.assertEqual(
            user_stats_1['total_plays'], expected_results['total_plays'])
        self.assertEqual(
            user_stats_1['num_ratings'], expected_results['num_ratings'])
        self.assertEqual(
            user_stats_1['average_ratings'],
            expected_results['average_ratings'])

        user_stats_2 = (
            user_jobs_continuous.UserStatsAggregator.get_dashboard_stats(
                self.user_b_id))
        self.assertEqual(
            user_stats_2['total_plays'], expected_results['total_plays'])
        self.assertEqual(
            user_stats_2['num_ratings'], expected_results['num_ratings'])
        self.assertEqual(
            user_stats_2['average_ratings'],
            expected_results['average_ratings'])
Example #22
0
def record_user_has_seen_notifications(user_id, last_seen_msecs):
    """Updates the last_checked time for this user (which represents the time
    the user last saw the notifications in the dashboard page or the
    notifications dropdown).

    Args:
        user_id: str. The user ID of the subscriber.
        last_seen_msecs: float. The time (in msecs since the Epoch) when the
            user last saw the notifications in the dashboard page or the
            notifications dropdown.
    """
    subscriptions_model = user_models.UserSubscriptionsModel.get(user_id,
                                                                 strict=False)
    if not subscriptions_model:
        subscriptions_model = user_models.UserSubscriptionsModel(id=user_id)

    subscriptions_model.last_checked = datetime.datetime.utcfromtimestamp(
        python_utils.divide(last_seen_msecs, 1000.0))
    subscriptions_model.put()
Example #23
0
    def validate_deleted(cls, item):
        """Validate that the models marked as deleted are hard-deleted after
        eight weeks.

        Args:
            item: datastore_services.Model. Entity to validate.
        """
        cls.errors.clear()
        date_now = datetime.datetime.utcnow()
        date_before_which_models_should_be_deleted = (
            date_now - feconf.PERIOD_TO_HARD_DELETE_MODELS_MARKED_AS_DELETED)
        period_to_hard_delete_models_in_days = (
            feconf.PERIOD_TO_HARD_DELETE_MODELS_MARKED_AS_DELETED.days)
        if item.last_updated < date_before_which_models_should_be_deleted:
            cls._add_error(
                'entity %s' % ERROR_CATEGORY_STALE_CHECK,
                'Entity id %s: model marked as deleted is older than %s weeks'
                %
                (item.id,
                 python_utils.divide(period_to_hard_delete_models_in_days, 7)))
Example #24
0
    def get_question_skill_links_based_on_difficulty_equidistributed_by_skill(
            cls, total_question_count, skill_ids, difficulty_requested):
        """Fetches the list of constant number of random QuestionSkillLinkModels
        linked to the skills, sorted by the absolute value of the difference
        between skill difficulty and the requested difficulty.

        Args:
            total_question_count: int. The number of questions expected.
            skill_ids: list(str). The ids of skills for which the linked
                question ids are to be retrieved.
            difficulty_requested: float. The skill difficulty of the questions
                requested to be fetched.

        Returns:
            list(QuestionSkillLinkModel). A list of random
            QuestionSkillLinkModels corresponding to given skill_ids, with
            total_question_count/len(skill_ids) number of questions for
            each skill. If not evenly divisible, it will be rounded up.
            If not enough questions for a skill, just return all questions
            it links to.
        """
        if len(skill_ids) > feconf.MAX_NUMBER_OF_SKILL_IDS:
            raise Exception('Please keep the number of skill IDs below 20.')

        if not skill_ids:
            return []

        question_count_per_skill = int(
            math.ceil(python_utils.divide(
                float(total_question_count), float(len(skill_ids)))))

        question_skill_link_mapping = {}

        # For fetching the questions randomly we have used a random offset.
        # But this is a temporary solution since this method scales linearly.
        # Other alternative methods were:
        # 1) Using a random id in question id filter
        # 2) Adding an additional column that can be filtered upon.
        # But these methods are not viable because google datastore limits
        # each query to have at most one inequality filter. So we can't filter
        # on both question_id and difficulty. Please see
        # https://github.com/oppia/oppia/pull/9061#issuecomment-629765809
        # for more details.

        def get_offset(query):
            """Helper function to get the offset."""
            question_count = query.count()
            if question_count > 2 * question_count_per_skill:
                return utils.get_random_int(
                    question_count - (question_count_per_skill * 2))
            return 0

        for skill_id in skill_ids:
            query = cls.query(cls.skill_id == skill_id)

            equal_questions_query = query.filter(
                cls.skill_difficulty == difficulty_requested)

            # We fetch more questions here in order to try and ensure that the
            # eventual number of returned questions is sufficient to meet the
            # number requested, even after deduplication.
            new_question_skill_link_models = equal_questions_query.fetch(
                limit=question_count_per_skill * 2,
                offset=get_offset(equal_questions_query))
            for model in new_question_skill_link_models:
                if model.question_id in question_skill_link_mapping:
                    new_question_skill_link_models.remove(model)

            if len(new_question_skill_link_models) >= question_count_per_skill:
                new_question_skill_link_models = random.sample(
                    new_question_skill_link_models, question_count_per_skill)
            else:
                # Fetch QuestionSkillLinkModels with difficulty smaller than
                # requested difficulty.
                easier_questions_query = query.filter(
                    cls.skill_difficulty < difficulty_requested)
                easier_question_skill_link_models = (
                    easier_questions_query.fetch(
                        limit=question_count_per_skill * 2,
                        offset=get_offset(easier_questions_query)))
                for model in easier_question_skill_link_models:
                    if model.question_id in question_skill_link_mapping:
                        easier_question_skill_link_models.remove(model)
                question_extra_count = (
                    len(new_question_skill_link_models) +
                    len(easier_question_skill_link_models) -
                    question_count_per_skill)
                if question_extra_count >= 0:
                    easier_question_skill_link_models = random.sample(
                        easier_question_skill_link_models,
                        question_count_per_skill -
                        len(new_question_skill_link_models)
                    )
                    new_question_skill_link_models.extend(
                        easier_question_skill_link_models)
                else:
                    # Fetch QuestionSkillLinkModels with difficulty larger than
                    # requested difficulty.
                    new_question_skill_link_models.extend(
                        easier_question_skill_link_models)
                    harder_questions_query = query.filter(
                        cls.skill_difficulty > difficulty_requested)
                    harder_question_skill_link_models = (
                        harder_questions_query.fetch(
                            limit=question_count_per_skill * 2,
                            offset=get_offset(harder_questions_query)))
                    harder_question_skill_link_models = (
                        harder_questions_query.fetch())
                    for model in harder_question_skill_link_models:
                        if model.question_id in question_skill_link_mapping:
                            harder_question_skill_link_models.remove(model)
                    question_extra_count = (
                        len(new_question_skill_link_models) +
                        len(harder_question_skill_link_models) -
                        question_count_per_skill)
                    if question_extra_count >= 0:
                        harder_question_skill_link_models = (
                            random.sample(
                                harder_question_skill_link_models,
                                question_count_per_skill -
                                len(new_question_skill_link_models)
                            ))
                    new_question_skill_link_models.extend(
                        harder_question_skill_link_models)

            new_question_skill_link_models = (
                new_question_skill_link_models[:question_count_per_skill])

            for model in new_question_skill_link_models:
                if model.question_id not in question_skill_link_mapping:
                    question_skill_link_mapping[model.question_id] = model

        return list(question_skill_link_mapping.values())
    def map(item):
        """Implements the map function (generator).
        Computes exploration data for every contributor and owner of the
        exploration.

        Args:
            item: ExpSummaryModel. An instance of ExpSummaryModel.

        Yields:
            tuple(owner_id, exploration_data), where:
                owner_id: str. The unique id of the user.
                exploration_data: dict. Has the keys:
                    exploration_impact_score: float. The impact score of all the
                        explorations contributed to by the user.
                    total_plays_for_owned_exp: int. Total plays of all
                        explorations owned by the user.
                    average_rating_for_owned_exp: float. Average of average
                        ratings of all explorations owned by the user.
                    num_ratings_for_owned_exp: int. Total number of ratings of
                        all explorations owned by the user.
        """
        if item.deleted:
            return

        exponent = python_utils.divide(2.0, 3)

        # This is set to False only when the exploration impact score is not
        # valid to be calculated.
        calculate_exploration_impact_score = True

        # Get average rating and value per user.
        total_rating = 0
        for ratings_value in item.ratings:
            total_rating += item.ratings[ratings_value] * int(ratings_value)
        sum_of_ratings = sum(item.ratings.values())

        average_rating = (python_utils.divide(total_rating, sum_of_ratings)
                          if sum_of_ratings else None)

        if average_rating is not None:
            value_per_user = average_rating - 2
            if value_per_user <= 0:
                calculate_exploration_impact_score = False
        else:
            calculate_exploration_impact_score = False

        exploration_stats = stats_services.get_exploration_stats(
            item.id, item.version)
        # For each state, find the number of first entries to the state.
        # This is considered to be approximately equal to the number of
        # users who answered the state because very few users enter a state
        # and leave without answering anything at all.
        answer_count = exploration_stats.get_sum_of_first_hit_counts()
        num_starts = exploration_stats.num_starts

        # Turn answer count into reach.
        reach = answer_count**exponent

        exploration_summary = exp_fetchers.get_exploration_summary_by_id(
            item.id)
        contributors = exploration_summary.contributors_summary
        total_commits = sum(contributors.values())
        if total_commits == 0:
            calculate_exploration_impact_score = False

        mapped_owner_ids = []
        for contrib_id in contributors:
            exploration_data = {}

            # Set the value of exploration impact score only if it needs to be
            # calculated.
            if calculate_exploration_impact_score:
                # Find fractional contribution for each contributor.
                contribution = (python_utils.divide(contributors[contrib_id],
                                                    float(total_commits)))

                # Find score for this specific exploration.
                exploration_data.update({
                    'exploration_impact_score':
                    (value_per_user * reach * contribution)
                })

            # If the user is an owner for the exploration, then update dict with
            # 'average ratings' and 'total plays' as well.
            if contrib_id in exploration_summary.owner_ids:
                mapped_owner_ids.append(contrib_id)
                # Get number of starts (total plays) for the exploration.
                exploration_data.update(
                    {'total_plays_for_owned_exp': num_starts})
                # Update data with average rating only if it is not None.
                if average_rating is not None:
                    exploration_data.update({
                        'average_rating_for_owned_exp':
                        average_rating,
                        'num_ratings_for_owned_exp':
                        sum_of_ratings
                    })
            yield (contrib_id, exploration_data)

        for owner_id in exploration_summary.owner_ids:
            if owner_id not in mapped_owner_ids:
                mapped_owner_ids.append(owner_id)
                # Get number of starts (total plays) for the exploration.
                exploration_data = {'total_plays_for_owned_exp': num_starts}
                # Update data with average rating only if it is not None.
                if average_rating is not None:
                    exploration_data.update({
                        'average_rating_for_owned_exp':
                        average_rating,
                        'num_ratings_for_owned_exp':
                        sum_of_ratings
                    })
                yield (owner_id, exploration_data)
Example #26
0
    def get_question_skill_links_based_on_difficulty_equidistributed_by_skill(
            cls, total_question_count, skill_ids, difficulty_requested):
        """Fetches the list of constant number of QuestionSkillLinkModels
        linked to the skills, sorted by the absolute value of the difference
        between skill difficulty and the requested difficulty.

        Args:
            total_question_count: int. The number of questions expected.
            skill_ids: list(str). The ids of skills for which the linked
                question ids are to be retrieved.
            difficulty_requested: float. The skill difficulty of the questions
                requested to be fetched.

        Returns:
            list(QuestionSkillLinkModel). A list of QuestionSkillLinkModels
                corresponding to given skill_ids, with
                total_question_count/len(skill_ids) number of questions for
                each skill. If not evenly divisible, it will be rounded up.
                If not enough questions for a skill, just return all questions
                it links to. The order of questions will follow the order of
                given skill ids, and the order of questions for the same skill
                follows the absolute value of the difference between skill
                difficulty and the requested difficulty.
        """
        if len(skill_ids) > feconf.MAX_NUMBER_OF_SKILL_IDS:
            raise Exception('Please keep the number of skill IDs below 20.')

        question_count_per_skill = int(
            math.ceil(python_utils.divide(
                float(total_question_count), float(len(skill_ids)))))

        question_skill_link_mapping = {}

        for skill_id in skill_ids:
            query = cls.query(cls.skill_id == skill_id)

            equal_questions_query = query.filter(
                cls.skill_difficulty == difficulty_requested)
            # We fetch more questions here in order to try and ensure that the
            # eventual number of returned questions is sufficient to meet the
            # number requested, even after deduplication.
            new_question_skill_link_models = (
                equal_questions_query.fetch(question_count_per_skill * 2))
            for model in new_question_skill_link_models:
                if model.question_id in question_skill_link_mapping:
                    new_question_skill_link_models.remove(model)

            if len(new_question_skill_link_models) < question_count_per_skill:
                # Fetch QuestionSkillLinkModels with difficulty smaller than
                # requested difficulty and sort them by decreasing difficulty.
                easier_questions_query = query.filter(
                    cls.skill_difficulty < difficulty_requested)
                easier_questions_query = easier_questions_query.order(
                    -cls.skill_difficulty)
                easier_question_skill_link_models = (
                    easier_questions_query.fetch(question_count_per_skill))
                for model in easier_question_skill_link_models:
                    if model.question_id in question_skill_link_mapping:
                        easier_question_skill_link_models.remove(model)
                new_question_skill_link_models.extend(
                    easier_question_skill_link_models)

                if (len(new_question_skill_link_models) <
                        question_count_per_skill):
                    # Fetch QuestionSkillLinkModels with difficulty larger than
                    # requested difficulty and sort them by increasing
                    # difficulty.
                    harder_questions_query = query.filter(
                        cls.skill_difficulty > difficulty_requested)
                    harder_questions_query = harder_questions_query.order(
                        cls.skill_difficulty)
                    harder_question_skill_link_models = (
                        harder_questions_query.fetch(question_count_per_skill))
                    for model in harder_question_skill_link_models:
                        if model.question_id in question_skill_link_mapping:
                            harder_question_skill_link_models.remove(model)
                    new_question_skill_link_models.extend(
                        harder_question_skill_link_models)

                # Sort QuestionSkillLinkModels by the difference between their
                # difficulty and requested difficulty.
                new_question_skill_link_models = sorted(
                    new_question_skill_link_models,
                    key=lambda model: abs(
                        model.skill_difficulty - difficulty_requested)
                )
            new_question_skill_link_models = (
                new_question_skill_link_models[:question_count_per_skill])

            for model in new_question_skill_link_models:
                if model.question_id not in question_skill_link_mapping:
                    question_skill_link_mapping[model.question_id] = model

        return list(question_skill_link_mapping.values())
Example #27
0
    def test_stats_for_multiple_explorations_with_multiple_owners(self):
        exploration_1 = self.save_new_default_exploration(
            self.EXP_ID_1, self.owner_id_1, title=self.EXP_TITLE_1)
        exploration_2 = self.save_new_default_exploration(
            self.EXP_ID_2, self.owner_id_1, title=self.EXP_TITLE_2)

        rights_manager.assign_role_for_exploration(
            self.owner_1, self.EXP_ID_1, self.owner_id_2,
            rights_domain.ROLE_OWNER)
        rights_manager.assign_role_for_exploration(
            self.owner_1, self.EXP_ID_2, self.owner_id_2,
            rights_domain.ROLE_OWNER)

        self.login(self.OWNER_EMAIL_2)
        response = self.get_json(feconf.CREATOR_DASHBOARD_DATA_URL)
        self.assertEqual(len(response['explorations_list']), 2)

        exp_version = self.EXP_DEFAULT_VERSION

        exp_id_1 = self.EXP_ID_1
        state_1 = exploration_1.init_state_name
        exp_id_2 = self.EXP_ID_2
        state_2 = exploration_2.init_state_name

        self._record_start(exp_id_1, exp_version, state_1)
        self._record_start(exp_id_1, exp_version, state_1)
        self._record_start(exp_id_2, exp_version, state_2)
        self._record_start(exp_id_2, exp_version, state_2)
        self._record_start(exp_id_2, exp_version, state_2)

        self._rate_exploration(exp_id_1, [5, 3])
        self._rate_exploration(exp_id_2, [5, 5])

        self._run_user_stats_aggregator_job()

        expected_results = {
            'total_plays': 5,
            'num_ratings': 4,
            'average_ratings': python_utils.divide(18, 4.0)
        }

        user_model_2 = user_models.UserStatsModel.get(self.owner_id_2)
        self.assertEqual(
            user_model_2.total_plays, expected_results['total_plays'])
        self.assertEqual(
            user_model_2.impact_score, self.USER_IMPACT_SCORE_DEFAULT)
        self.assertEqual(
            user_model_2.num_ratings, expected_results['num_ratings'])
        self.assertEqual(
            user_model_2.average_ratings, expected_results['average_ratings'])
        self.logout()

        self.login(self.OWNER_EMAIL_1)
        response = self.get_json(feconf.CREATOR_DASHBOARD_DATA_URL)
        self.assertEqual(len(response['explorations_list']), 2)

        user_model_1 = user_models.UserStatsModel.get(self.owner_id_1)
        self.assertEqual(
            user_model_1.total_plays, expected_results['total_plays'])
        self.assertEqual(
            user_model_1.impact_score, self.USER_IMPACT_SCORE_DEFAULT)
        self.assertEqual(
            user_model_1.num_ratings, expected_results['num_ratings'])
        self.assertEqual(
            user_model_1.average_ratings, expected_results['average_ratings'])
        self.logout()