def run(self):
     """Computes student progress statistics."""
     student_progress = self.ProgressAggregator(self._course)
     mapper = models_utils.QueryMapper(
         StudentPropertyEntity.all(), batch_size=500, report_every=1000)
     mapper.run(student_progress.visit)
     return student_progress.progress_data
Beispiel #2
0
    def run(self):
        """Computes peer review statistics."""

        stats = ReviewStatsAggregator()
        mapper = utils.QueryMapper(peer.ReviewSummary.all(),
                                   batch_size=500,
                                   report_every=1000)

        mapper.run(stats.visit)

        completed_arrays_by_unit = {}
        for unit_id in stats.counts_by_completed_reviews:
            max_completed_reviews = max(
                stats.counts_by_completed_reviews[unit_id].keys())

            completed_reviews_array = []
            for i in range(max_completed_reviews + 1):
                if i in stats.counts_by_completed_reviews[unit_id]:
                    completed_reviews_array.append(
                        stats.counts_by_completed_reviews[unit_id][i])
                else:
                    completed_reviews_array.append(0)
            completed_arrays_by_unit[unit_id] = completed_reviews_array

        return {'counts_by_completed_reviews': completed_arrays_by_unit}
    def run(self):
        """Computes student statistics."""

        enrollment = EnrollmentAggregator()
        scores = ScoresAggregator()
        names = NamesAggregator()
        mapper = utils.QueryMapper(Student.all(),
                                   batch_size=500,
                                   report_every=1000)

        def map_fn(student):
            enrollment.visit(student)
            scores.visit(student)
            names.visit(student)

        mapper.run(map_fn)

        data = {
            'enrollment': {
                'enrolled': enrollment.enrolled,
                'unenrolled': enrollment.unenrolled
            },
            'scores': scores.name_to_tuple,
            'id': names.st_id
        }

        return data
 def run(self):
     """Computes submitted question answers statistics."""
     question_stats = self.MultipleChoiceQuestionAggregator(self._course)
     mapper = models_utils.QueryMapper(
         EventEntity.all(), batch_size=500, report_every=1000)
     mapper.run(question_stats.visit)
     return (question_stats.id_to_questions_dict,
             question_stats.id_to_assessments_dict)
    def test_run_processes_one_entity(self):
        """Tests that we can process < batch_size results."""
        Model().put()
        num_processed = utils.QueryMapper(Model.all()).run(process,
                                                           1,
                                                           string='foo')
        model = Model.all().get()

        self.assertEqual(1, num_processed)
        self.assertEqual(1, model.number)
        self.assertEqual('foo', model.string)
    def test_run_process_more_than_1000_entities(self):
        """Tests we can process more entities than the old limit of 1k."""
        counter = counters.PerfCounter(
            'test-run-process-more-than-1000-entities-counter',
            'counter for testing increment by QueryMapper')
        db.put([Model() for _ in xrange(1001)])
        # Also pass custom args to QueryMapper ctor.
        num_processed = utils.QueryMapper(Model.all(),
                                          batch_size=50,
                                          counter=counter,
                                          report_every=0).run(process,
                                                              1,
                                                              string='foo')
        last_written = Model.all().order('-create_date').get()

        self.assertEqual(1001, counter.value)
        self.assertEqual(1001, num_processed)
        self.assertEqual(1, last_written.number)
        self.assertEqual('foo', last_written.string)
    def query(cls, to, intent):
        """Gets the Status of notifications queued previously via send_async().

        Serially performs one datastore query per user in the to list.

        Args:
          to: list of string. The recipients of the notification.
          intent: string. Short string identifier of the intent of the
              notification (for example, 'invitation' or 'reminder').

        Returns:
            Dict of to string -> [Status, sorted by descending enqueue date].
        """
        results = {}

        for address in to:
            mapper = utils.QueryMapper(cls._get_query_query(address, intent))
            mapper.run(_accumulate_statuses, results)

        return results
Beispiel #8
0
    def expire_old_reviews_for_unit(cls, review_window_mins, unit_id):
        """Finds and expires all old review steps for a single unit.

        Args:
            review_window_mins: int. Number of minutes before we expire reviews
                assigned by domain.ASSIGNER_KIND_AUTO.
            unit_id: string. Id of the unit to restrict the query to.

        Returns:
            2-tuple of list of db.Key of peer.ReviewStep. 0th element is keys
            that were written successfully; 1st element is keys that we failed
            to update.
        """
        query = cls.get_expiry_query(review_window_mins, unit_id)
        mapper = utils.QueryMapper(query,
                                   counter=COUNTER_EXPIRY_QUERY_KEYS_RETURNED,
                                   report_every=100)
        expired_keys = []
        exception_keys = []

        def map_fn(review_step_key, expired_keys, exception_keys):
            try:
                expired_keys.append(cls.expire_review(review_step_key))
            except:  # All errors are the same. pylint: disable=bare-except
                # Skip. Either the entity was updated between the query and
                # the update, meaning we don't need to expire it; or we ran into
                # a transient datastore error, meaning we'll expire it next
                # time.
                COUNTER_EXPIRE_OLD_REVIEWS_FOR_UNIT_SKIP.inc()
                exception_keys.append(review_step_key)

        COUNTER_EXPIRE_OLD_REVIEWS_FOR_UNIT_START.inc()

        mapper.run(map_fn, expired_keys, exception_keys)
        COUNTER_EXPIRE_OLD_REVIEWS_FOR_UNIT_EXPIRE.inc(
            increment=len(expired_keys))
        COUNTER_EXPIRE_OLD_REVIEWS_FOR_UNIT_SUCCESS.inc()
        return expired_keys, exception_keys
Beispiel #9
0
    def get_activity_scores(cls, student_user_ids, course, force_refresh=True):
        """Retrieve activity data for student using EventEntity.

           For each student, launch a Query of EventEntities to retrieve student
           scores.  The Query is launched as a map-reduce background process that
           will return up to 500 results, reporting back every second.  It reports
           back by calling the map_fn callback, which in turn calls parse_activity
           scores.

           As soon as the Query is launched (in the background) the foreground
           process calls build_missing_scores() to construct a student_answer.dict
           that will be updated as score data for that student is received.

           Events properties include a userid (a number) and a source (e.g.,
           tag-assessement), a  recorded-on date (timestamp) and data (a dictionary).
           Here's a typeical data dict:

           {"loc": {"city": "mililani", "language": "en-US,en;q=0.8", "locale": "en_US",
           "country": "US", "region": "hi", "long": -158.01528099999999, "lat": 21.451331,
           "page_locale": "en_US"}, "instanceid": "yOkVTqWogdaF", "quid": "5733935958982656",
           "score": 1, "location": "https://mobilecsp-201608.appspot.com/mobilecsp/unit?unit=1&lesson=45",
           "answer": [0, 1, 2, 4], "type": "McQuestion", "user_agent":
            "Mozilla/5.0 ..."}

           Note that it includes the unit_id and lesson_id as part of the Url
        """

        # Instantiate parser object
        cached_date = datetime.datetime.now()
        activityParser = ActivityScoreParser()

        if force_refresh:
            activityParser.params = activityParser.build_additional_mapper_params(
                course.app_context)

            #  Launch a background Query for each student's activity data.  This is expensive.
            for user_id in student_user_ids:
                #                if GLOBAL_DEBUG:
                logging.debug('***RAM*** launching a query for student ' +
                              str(user_id))
                mapper = models_utils.QueryMapper(
                    EventEntity.all().filter('user_id in', [user_id])       \
                                     .filter('recorded_on  >= ', cls.CUTOFF_DATE), \
                                        batch_size=1000, report_every=1000)

                # Callback function -- e.g., 45-50 callbacks per query
                def map_fn(activity_attempt):
                    #                    if GLOBAL_DEBUG:
                    #                     logging.debug('***RAM*** map_fn ' + str(activity_attempt))
                    activityParser.parse_activity_scores(activity_attempt)

                mapper.run(map_fn)

            #  In the foreground create the student_answer_dict, which is stored at:
            #   activity_scores[student][unit][lesson][sequence]  where sequence is
            #   the question's sequential position within the lesson.
            #  So each question in the lesson will have a question_answer_dict.
            activityParser.build_missing_scores()

            #Lets cache results for each student
            for user_id in student_user_ids:
                cached_student_data = {}
                cached_student_data['date'] = cached_date

                student = Student.get_by_user_id(user_id)

                cached_student_data[
                    'scores'] = activityParser.activity_scores.get(
                        student.email, {})
                cached_student_data[
                    'attempts'] = activityParser.num_attempts_dict.get(
                        student.email, {})
                MemcacheManager.set(
                    cls._memcache_key_for_student(student.email),
                    cached_student_data)
        else:
            uncached_students = []
            for student_id in student_user_ids:
                if student_id != '':
                    student = Student.get_by_user_id(student_id)
                    temp_email = student.email
                    temp_mem = cls._memcache_key_for_student(temp_email)
                    scores_for_student = MemcacheManager.get(temp_mem)
                    if scores_for_student:
                        cached_date = scores_for_student['date']
                        activityParser.activity_scores[
                            student_id] = scores_for_student['scores']
                        activityParser.num_attempts_dict[
                            student_id] = scores_for_student['scores']
                    else:
                        uncached_students.append(student_id)
            if len(uncached_students) > 0:
                if cached_date == None or datetime.datetime.now(
                ) < cached_date:
                    cached_date = datetime.datetime.now()

                activityParser.params = activityParser.build_additional_mapper_params(
                    course.app_context)

                for user_id in uncached_students:
                    mapper = models_utils.QueryMapper(
                        EventEntity.all().filter('user_id in', [user_id])       \
                                     .filter('recorded_on  >= ', cls.CUTOFF_DATE), \
                                        batch_size=1000, report_every=1000)

                    def map_fn(activity_attempt):
                        activityParser.parse_activity_scores(activity_attempt)

                    mapper.run(map_fn)

                activityParser.build_missing_scores()

                #Lets cache results for each student
                for user_id in uncached_students:
                    cached_student_data = {}
                    cached_student_data['date'] = cached_date

                    student = Student.get_by_user_id(user_id)

                    cached_student_data[
                        'scores'] = activityParser.activity_scores.get(
                            student.email, {})
                    MemcacheManager.set(
                        cls._memcache_key_for_student(student.email),
                        cached_student_data)

        score_data = {}
        score_data['date'] = cached_date
        score_data['scores'] = activityParser.activity_scores
        score_data['attempts'] = activityParser.num_attempts_dict
        if GLOBAL_DEBUG:
            logging.debug('***RAM*** get_activity_scores returning scores: ' +
                          str(score_data['scores']))

        return score_data
Beispiel #10
0
 def _process_records(self, namespace, now, stats):
     with common_utils.Namespace(namespace):
         # Treating as module-protected. pylint: disable-msg=protected-access
         mapper = model_utils.QueryMapper(
             notifications.Manager._get_in_process_notifications_query())
         mapper.run(process_notification, now, stats)
 def test_run_processes_empty_result_set(self):
     self.assertEqual(
         0,
         utils.QueryMapper(Model.all()).run(process, 1, string='foo'))
    def test_raising_stop_mapping_stops_execution(self):
        db.put([Model(number=x) for x in xrange(11)])
        num_processed = utils.QueryMapper(
            Model.all().order('number')).run(stop_mapping_at_5)

        self.assertEqual(5, num_processed)