def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _date_string = value

        event_type = event.get('event_type')
        if event_type is None:
            log.error("encountered event with no event_type: %s", event)
            return

        if event_type not in (DEACTIVATED, ACTIVATED):
            return

        timestamp = eventlog.get_event_time_string(event)
        if timestamp is None:
            log.error("encountered event with bad timestamp: %s", event)
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        course_id = event_data.get('course_id')
        if course_id is None or not opaque_key_util.is_valid_course_id(course_id):
            log.error("encountered explicit enrollment event with invalid course_id: %s", event)
            return

        user_id = event_data.get('user_id')
        if user_id is None:
            log.error("encountered explicit enrollment event with no user_id: %s", event)
            return

        yield (course_id, user_id), (timestamp, event_type)
    def _obfuscate_event(self, event):
        """Obfuscate event by removing/stubbing user information."""

        # Create a string to use when logging errors, to provide some context.
        debug_str = self._get_log_string_for_event(event)

        # Remap the user information stored in the event, and collect for later searching.
        event_data = eventlog.get_event_data(event)
        user_info = self._remap_user_info_in_event(event, event_data)

        # Clean or remove values from context.
        if 'context' in event:
            # These aren't present in current events, but are generated historically by some implicit events.
            event['context'].pop('host', None)
            event['context'].pop('ip', None)
            # Not sure how to clean this, so removing.
            event['context'].pop('path', None)
            # Clean out some of the more obvious values in (mobile) client:
            if 'client' in event['context']:
                event['context']['client'].pop('device', None)
                event['context']['client'].pop('ip', None)

        # Do remaining cleanup on the entire event payload (assuming user-based remapping has been done).
        if event_data:
            # Remove possibly sensitive payload fields (or fields we don't know how to clean).
            for key in [
                    'certificate_id', 'certificate_url', 'source_url', 'fileName', 'GET', 'POST',
                    'requesting_student_id', 'report_url', 'url', 'url_name']:
                event_data.pop(key, None)

            for key in ['answer', 'saved_response']:
                if key in event_data and 'file_upload_key' in event_data[key]:
                    event_data[key].pop('file_upload_key')

            # Clean up remaining event payload recursively.
            updated_event_data = self.obfuscator.obfuscate_structure(event_data, u"event", user_info)
            if updated_event_data is not None:
                if self.obfuscator.is_logging_enabled():
                    log.info(u"Obfuscated payload: %s", debug_str)
                event_data = updated_event_data

            # Re-encode payload as a json string if it originally was one.
            # (This test works because we throw away string values that didn't parse as JSON.)
            if isinstance(event.get('event'), basestring):
                event['event'] = cjson.encode(event_data)
            else:
                event['event'] = event_data

        ip_address = event.get('ip')
        # Skip over IPv6-format ip_address values for now.
        if ip_address and ':' not in ip_address:
            country_code = self.get_country_code(ip_address)
            event.update({'augmented': {'country_code': country_code}})

        # Delete base properties other than username.
        for key in ['host', 'ip', 'page', 'referer']:
            event.pop(key, None)

        return event
 def test_get_truncated_post_string_event_data(self):
     post = '{"POST": "not JSON because of truncation %s'
     post512 = post % ('a' * (512 + 2 - len(post)))
     self.assertEquals(len(post512), 512)
     item = {"event": post512}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.debug.call_count, 1)
     self.assertIn('truncated event value', self.mock_log.debug.call_args[0][0])
 def get_raw_event(self, event_line):
     event = eventlog.parse_json_event(event_line)
     event_data = eventlog.get_event_data(event)
     if event_data is not None:
         event['event'] = event_data
     dump = json.dumps(event, sort_keys=True)
     encoded_dump = backslash_encode_value(dump)
     return encoded_dump
    def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _date_string = value

        event_type = event.get('event_type')
        if event_type is None:
            log.error("encountered event with no event_type: %s", event)
            return

        if event_type not in (DEACTIVATED, ACTIVATED, MODE_CHANGED, VALIDATED):
            return

        timestamp = eventlog.get_event_time_string(event)
        if timestamp is None:
            log.error("encountered event with bad timestamp: %s", event)
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        course_id = event_data.get('course_id')
        if course_id is None or not opaque_key_util.is_valid_course_id(
                course_id):
            log.error(
                "encountered explicit enrollment event with invalid course_id: %s",
                event)
            return

        user_id = event_data.get('user_id')
        if user_id is None:
            log.error(
                "encountered explicit enrollment event with no user_id: %s",
                event)
            return

        mode = event_data.get('mode')
        if mode is None:
            log.error("encountered explicit enrollment event with no mode: %s",
                      event)
            return

        # Pull in extra properties provided only by synthetic enrollment validation events.
        validation_info = None
        if 'dump_start' in event_data:
            validation_info = {
                'is_active': event_data.get('is_active'),
                'created': event_data.get('created'),
                'dump_start': event_data.get('dump_start'),
                'dump_end': event_data.get('dump_end'),
            }

        # Make sure key values that are strings are properly encoded.
        # Note, however, that user_id is an int.
        key = (unicode(course_id).encode('utf-8'), user_id)
        yield key, (timestamp, event_type, mode, validation_info)
    def mapper(self, line):
        # We only want to consider lines that include the type of event with which we are concerned.
        if LINK_CLICKED not in line:
            return

        value = self.get_event_and_date_string(line)

        if value is None:
            return
        event, date_string = value

        event_type = event.get('event_type')
        if not event_type:
            log.error("encountered event with no event_type: %s", event)
            return

        if event_type != LINK_CLICKED:
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            log.error(
                "encountered explicit link_clicked event with no event data: %s",
                event)
            return

        course_id = eventlog.get_course_id(event)
        if course_id is None:
            log.error(
                "encountered explicit link_clicked event with invalid course_id: %s",
                event)
            return

        target_url = event_data.get('target_url')
        if not target_url:
            log.error(
                "encountered explicit link_clicked event with no target_url: %s",
                event)
            return

        current_url = event_data.get('current_url')
        if not current_url:
            log.error(
                "encountered explicit link_clicked event with no current_url: %s",
                event)
            return

        # A link is considered "internal" when it does not navigate away from the current host.
        # Some internal links exclude the host name entirely- they start with / so we account for that.
        current_loc = urlparse(current_url).netloc
        target_loc = urlparse(target_url).netloc

        is_external = 0
        if current_loc != target_loc and target_loc != "":
            is_external = 1

        yield (course_id, date_string), (is_external)
 def test_get_truncated_post_string_event_data(self):
     post = '{"POST": "not JSON because of truncation %s'
     post512 = post % ('a' * (512 + 2 - len(post)))
     self.assertEquals(len(post512), 512)
     item = {"event": post512}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.debug.call_count, 1)
     self.assertIn('truncated event value',
                   self.mock_log.debug.call_args[0][0])
    def mapper(self, line):
        # We only want to consider lines that include the type of event with which we are concerned.
        if LINK_CLICKED not in line:
            return

        value = self.get_event_and_date_string(line)

        if value is None:
            return
        event, date_string = value

        event_type = event.get('event_type')
        if not event_type:
            log.error("encountered event with no event_type: %s", event)
            return

        if event_type != LINK_CLICKED:
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            log.error("encountered explicit link_clicked event with no event data: %s", event)
            return

        context = event.get('context')
        if not context:
            log.error("encountered explicit link_clicked event with no context: %s", event)
            return

        course_id = context.get('course_id')
        if course_id is None or not opaque_key_util.is_valid_course_id(course_id):
            log.error("encountered explicit link_clicked event with invalid course_id: %s", event)
            return

        target_url = event_data.get('target_url')
        if not target_url:
            log.error("encountered explicit link_clicked event with no target_url: %s", event)
            return

        current_url = event_data.get('current_url')
        if not current_url:
            log.error("encountered explicit link_clicked event with no current_url: %s", event)
            return

        # A link is considered "internal" when it does not navigate away from the current host.
        # Some internal links exclude the host name entirely- they start with / so we account for that.
        current_loc = urlparse(current_url).netloc
        target_loc = urlparse(target_url).netloc

        is_external = 0
        if current_loc != target_loc and target_loc != "":
            is_external = 1

        yield (course_id, date_string), (is_external)
Exemple #9
0
    def _obfuscate_event(self, event):
        """Obfuscate event by removing/stubbing user information."""

        # Create a string to use when logging errors, to provide some context.
        debug_str = self._get_log_string_for_event(event)

        # Remap the user information stored in the event, and collect for later searching.
        event_data = eventlog.get_event_data(event)
        user_info = self._remap_user_info_in_event(event, event_data)

        # Clean or remove values from context.
        if 'context' in event:
            # These aren't present in current events, but are generated historically by some implicit events.
            event['context'].pop('host', None)
            event['context'].pop('ip', None)
            # Not sure how to clean this, so removing.
            event['context'].pop('path', None)
            # Clean out some of the more obvious values in (mobile) client:
            if 'client' in event['context']:
                event['context']['client'].pop('device', None)
                event['context']['client'].pop('ip', None)

        # Do remaining cleanup on the entire event payload (assuming user-based remapping has been done).
        if event_data:
            # Remove possibly sensitive payload fields (or fields we don't know how to clean).
            for key in [
                    'certificate_id', 'certificate_url', 'source_url', 'fileName', 'GET', 'POST',
                    'requesting_student_id', 'report_url', 'url', 'url_name']:
                event_data.pop(key, None)

            for key in ['answer', 'saved_response']:
                if key in event_data and 'file_upload_key' in event_data[key]:
                    event_data[key].pop('file_upload_key')

            # Clean up remaining event payload recursively.
            updated_event_data = self.obfuscator.obfuscate_structure(event_data, u"event", user_info)
            if updated_event_data is not None:
                if self.obfuscator.is_logging_enabled():
                    log.info(u"Obfuscated payload: %s", debug_str)
                event_data = updated_event_data

            # Re-encode payload as a json string if it originally was one.
            # (This test works because we throw away string values that didn't parse as JSON.)
            if isinstance(event.get('event'), basestring):
                event['event'] = cjson.encode(event_data)
            else:
                event['event'] = event_data

        # Delete base properties other than username.
        for key in ['host', 'ip', 'page', 'referer']:
            event.pop(key, None)

        return event
    def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _date_string = value

        event_type = event.get('event_type')
        if event_type is None:
            log.error("encountered event with no event_type: %s", event)
            return

        if event_type not in (DEACTIVATED, ACTIVATED, MODE_CHANGED, VALIDATED):
            return

        timestamp = eventlog.get_event_time_string(event)
        if timestamp is None:
            log.error("encountered event with bad timestamp: %s", event)
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        course_id = event_data.get('course_id')
        if course_id is None or not opaque_key_util.is_valid_course_id(course_id):
            log.error("encountered explicit enrollment event with invalid course_id: %s", event)
            return

        user_id = event_data.get('user_id')
        if user_id is None:
            log.error("encountered explicit enrollment event with no user_id: %s", event)
            return

        mode = event_data.get('mode')
        if mode is None:
            log.error("encountered explicit enrollment event with no mode: %s", event)
            return

        # Pull in extra properties provided only by synthetic enrollment validation events.
        validation_info = None
        if 'dump_start' in event_data:
            validation_info = {
                'is_active': event_data.get('is_active'),
                'created': event_data.get('created'),
                'dump_start': event_data.get('dump_start'),
                'dump_end': event_data.get('dump_end'),
            }

        # Make sure key values that are strings are properly encoded.
        # Note, however, that user_id is an int.
        key = (unicode(course_id).encode('utf-8'), user_id)
        yield key, (timestamp, event_type, mode, validation_info)
    def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, date_string = value

        event_type = event.get('event_type')
        if event_type is None:
            log.error("encountered event with no event_type: %s", event)
            return

        if event_type not in (DEACTIVATED, ACTIVATED, MODE_CHANGED):
            return

        timestamp = eventlog.get_event_time_string(event)
        if timestamp is None:
            log.error("encountered event with bad timestamp: %s", event)
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        course_id = opaque_key_util.normalize_course_id(
            event_data.get('course_id'))
        if course_id is None or not opaque_key_util.is_valid_course_id(
                course_id):
            log.error(
                "encountered explicit enrollment event with invalid course_id: %s",
                event)
            return

        user_id = event_data.get('user_id')
        if user_id is None:
            log.error(
                "encountered explicit enrollment event with no user_id: %s",
                event)
            return

        mode = event_data.get('mode')
        if mode is None:
            log.error("encountered explicit enrollment event with no mode: %s",
                      event)
            return

        yield date_string, (course_id, user_id, timestamp, event_type, mode)
    def deidentify_event(self, event):
        """Deidentify event by removing/stubbing user information."""

        if 'context' in event and 'user_id' in event['context'] and event['context']['user_id']:  # found '' as user_id
            event['context']['user_id'] = self.remap_id(event['context']['user_id'])

        event.pop('host', None)  # delete host
        event.pop('ip', None)  # delete ip

        if event['username']:
            try:
                event['username'] = self.generate_deid_username_from_user_id(self.username_map[event['username']])
            except KeyError:
                log.error("Unable to find username: %s in the username_map", event['username'])
                return None

        event_data = eventlog.get_event_data(event)

        if event_data:
            if 'user_id' in event_data and event_data['user_id']:
                event_data['user_id'] = self.remap_id(event_data['user_id'])

            if 'username' in event_data:
                # How to DRY ?
                try:
                    event_data['username'] = self.generate_deid_username_from_user_id(self.username_map[event_data['username']])
                except KeyError:
                    log.error("Unable to find username: %s in the username_map", event_data['username'])
                    return None

            for key in ['certificate_id', 'certificate_url', 'source_url', 'fileName', 'GET', 'POST']:
                event_data.pop(key, None)

            for key in ['answer', 'saved_response']:
                if key in event_data and 'file_upload_key' in event_data[key]:
                    event_data[key].pop('file_upload_key')

            # TODO: clean ?

            if isinstance(event.get('event'), basestring):  # re-encode as a json string if originally
                event['event'] = cjson.encode(event_data)
            else:
                event['event'] = event_data

        return event
    def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, date_string = value

        username = event.get('username', '').strip()
        if not username:
            return

        event_type = event.get('event_type')
        if event_type is None:
            return

        course_id = eventlog.get_course_id(event)
        if not course_id:
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        event_source = event.get('event_source')

        entity_id, entity_type, user_actions = self.get_user_actions_from_event(event_data, event_source, event_type)

        if not entity_id or not entity_type:
            return

        for action in user_actions:
            record = ModuleEngagementRecord(
                course_id=course_id,
                username=username,
                date=DateField().deserialize_from_string(date_string),
                entity_type=entity_type,
                entity_id=entity_id,
                event=action,
                count=0
            )
            # The count is populated by the reducer, so exclude it from the key.
            record_without_count = record.to_string_tuple()[:-1]
            yield (record_without_count, 1)
    def mapper(self, line):
        """
        Args:
            line: text line from a tracking event log.

        Yields:  (course_id, org_id, problem_id), (timestamp, saved_tags, is_correct)

        """
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _ = value

        if event.get('event_type') != 'problem_check' or event.get(
                'event_source') != 'server':
            return

        timestamp = eventlog.get_event_time_string(event)
        if timestamp is None:
            return

        course_id = eventlog.get_course_id(event)
        if not course_id:
            return

        org_id = opaque_key_util.get_org_id_for_course(course_id)

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        problem_id = event_data.get('problem_id')
        if not problem_id:
            return

        is_correct = event_data.get('success') == 'correct'

        saved_tags = event.get('context').get('asides', {}).get(
            'tagging_aside', {}).get('saved_tags', {})

        yield (course_id, org_id, problem_id), (timestamp, saved_tags,
                                                is_correct)
    def mapper(self, line):
        """
        Args:
            line: text line from a tracking event log.

        Yields:  (course_id, org_id, problem_id), (timestamp, saved_tags, is_correct)

        """
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _ = value

        if event.get('event_type') != 'problem_check' or event.get('event_source') != 'server':
            return

        timestamp = eventlog.get_event_time_string(event)
        if timestamp is None:
            return

        course_id = eventlog.get_course_id(event)
        if not course_id:
            return

        org_id = opaque_key_util.get_org_id_for_course(course_id)

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        problem_id = event_data.get('problem_id')
        if not problem_id:
            return

        is_correct = event_data.get('success') == 'correct'

        saved_tags = event.get('context').get('asides', {}).get('tagging_aside', {}).get('saved_tags', {})

        yield (course_id, org_id, problem_id), (timestamp, saved_tags, is_correct)
Exemple #16
0
    def mapper(self, line):
        event, date_string = self.get_event_and_date_string(line) or (None, None)
        if event is None:
            return

        if not self.is_valid_input_file():
            return

        org_id = self.get_org_id(event)
        if org_id not in self.org_id_whitelist:
            log.debug('Unrecognized organization: org_id=%s', org_id or '')
            return

        # Do not export events that have been explicitly flagged as not being for export.
        # Any event without an '_export' key will be sent as part of the export by default,
        # and likewise any event without a falsey value. The preferred value to not export is 'false'.
        event_data = eventlog.get_event_data(event)
        if event_data and str(event_data.get('_export', 'true')).lower() in ('n', 'f', '0', 'false', 'no'):
            return

        # Check to see if the org_id is one that should be grouped with other org_ids.
        org_ids = self.primary_org_ids_for_org_id[org_id]

        for key_org_id in org_ids:
            key = (date_string, key_org_id)

            # Include only requested courses
            requested_courses = self.courses_for_org_id.get(key_org_id)
            if requested_courses and eventlog.get_course_id(event, from_url=True) not in requested_courses:
                continue

            # Enforce a standard encoding for the parts of the key. Without this a part of the key
            # might appear differently in the key string when it is coerced to a string by luigi. For example,
            # if the same org_id appears in two different records, one as a str() type and the other a
            # unicode() then without this change they would appear as u'FooX' and 'FooX' in the final key
            # string. Although python doesn't care about this difference, hadoop does, and will bucket the
            # values separately. Which is not what we want.
            yield tuple([value.encode('utf8') for value in key]), line.strip()
 def test_event_data_with_list_type(self):
     item = {"event": ["a list", "of strings"]}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.debug.call_count, 1)
     self.assertIn('value of type list', self.mock_log.debug.call_args[0][0])
    def mapper(self, line):
        # Add a filter here to permit quicker rejection of unrelated events.
        if VIDEO_EVENT_MINIMUM_STRING not in line:
            # self.incr_counter(self.counter_category_name, 'Discard Missing Video String', 1)
            return

        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _date_string = value
        # self.incr_counter(self.counter_category_name, 'Inputs with Dates', 1)

        event_type = event.get('event_type')
        if event_type is None:
            log.error("encountered event with no event_type: %s", event)
            self.incr_counter(self.counter_category_name,
                              'Discard Missing Event Type', 1)
            return

        if event_type not in VIDEO_EVENT_TYPES:
            # self.incr_counter(self.counter_category_name, 'Discard Non-Video Event Type', 1)
            return

        # self.incr_counter(self.counter_category_name, 'Input Video Events', 1)

        # This has already been checked when getting the event, so just fetch the value.
        timestamp = eventlog.get_event_time_string(event)

        # Strip username to remove trailing newlines that mess up Luigi.
        username = event.get('username', '').strip()
        if not username:
            log.error("Video event without username: %s", event)
            ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
            ## self.incr_counter(self.counter_category_name, 'Discard Video Missing username', 1)
            return

        course_id = eventlog.get_course_id(event)
        if course_id is None:
            log.warn('Video event without valid course_id: {0}'.format(line))
            ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
            ## self.incr_counter(self.counter_category_name, 'Discard Video Missing course_id', 1)
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            # This should already have been logged.
            ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
            ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Event Data', 1)
            return

        encoded_module_id = event_data.get(
            'id', '').strip()  # we have seen id values with leading newline
        if not encoded_module_id:
            log.warn(
                'Video event without valid encoded_module_id (id): {0}'.format(
                    line))
            ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
            ## self.incr_counter(self.counter_category_name, 'Discard Video Missing encoded_module_id', 1)
            return

        video_duration = event_data.get('duration', VIDEO_UNKNOWN_DURATION)
        if not video_duration:
            # events may have a 'duration' value of null, so use the same default for those as well.
            video_duration = VIDEO_UNKNOWN_DURATION

        # self.incr_counter(self.counter_category_name, 'Video Events Before Time Check', 1)

        current_time = None
        old_time = None
        youtube_id = None
        if event_type == VIDEO_PLAYED:
            code = event_data.get('code')
            if code not in VIDEO_CODES:
                youtube_id = code
            current_time = self._check_time_offset(
                event_data.get('currentTime'), line)
            if current_time is None:
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Time', 1)
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Time From Play', 1)
                return
            ### self.incr_counter(self.counter_category_name, 'Subset Play', 1)
        elif event_type == VIDEO_PAUSED:
            # Pause events may have a missing currentTime value if video is paused at the beginning,
            # so provide a default of zero.
            current_time = self._check_time_offset(
                event_data.get('currentTime', 0), line)
            if current_time is None:
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Time', 1)
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Time From Pause', 1)
                return
            ### self.incr_counter(self.counter_category_name, 'Subset Pause', 1)
        elif event_type == VIDEO_SEEK:
            current_time = self._check_time_offset(event_data.get('new_time'),
                                                   line)
            old_time = self._check_time_offset(event_data.get('old_time'),
                                               line)
            if current_time is None or old_time is None:
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Time', 1)
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Time From Seek', 1)
                return
            ### self.incr_counter(self.counter_category_name, 'Subset Seek', 1)
        elif event_type == VIDEO_STOPPED:
            current_time = self._check_time_offset(
                event_data.get('currentTime'), line)
            if current_time is None:
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Time', 1)
                ## self.incr_counter(self.counter_category_name, 'Discard Video Missing Time From Stop', 1)
                return
            ### self.incr_counter(self.counter_category_name, 'Subset Stop', 1)

        if youtube_id is not None:
            youtube_id = youtube_id.encode('utf8')

        # self.incr_counter(self.counter_category_name, 'Output Video Events from Mapper', 1)
        yield ((username.encode('utf8'), course_id.encode('utf8'),
                encoded_module_id.encode('utf8')),
               (timestamp, event_type, current_time, old_time, youtube_id,
                video_duration))
Exemple #19
0
    def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, date_string = value

        username = event.get('username', '').strip()
        if not username:
            return

        event_type = event.get('event_type')
        if event_type is None:
            return

        course_id = eventlog.get_course_id(event)
        if not course_id:
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        event_source = event.get('event_source')

        entity_id = ''
        info = {}
        forum_post_voted = None
        if event_type == 'problem_check':
            if event_source != 'server':
                return

            problem_id = event_data.get('problem_id')
            if not problem_id:
                return

            entity_id = problem_id
            if event_data.get('success', 'incorrect').lower() == 'correct':
                info['correct'] = True
        elif event_type == 'play_video':
            encoded_module_id = event_data.get('id')
            if not encoded_module_id:
                return

            entity_id = encoded_module_id
        elif event_type[:9] == '/courses/' and re.match(
                self.SUBSECTION_ACCESSED_PATTERN, event_type):
            timestamp = eventlog.get_event_time_string(event)
            if timestamp is None:
                return
            info['path'] = event_type
            info['timestamp'] = timestamp
            event_type = SUBSECTION_VIEWED_MARKER
        elif event_type.startswith('edx.forum'):
            forum_post_voted = re.match(
                r'edx\.forum\.(?P<post_type>\w+)\.voted', event_type)
            if forum_post_voted:
                info['vote_value'] = event_data.get('vote_value')
                if info['vote_value'] not in ['up', 'down']:
                    return
                info['undo_vote'] = event_data.get('undo_vote', False)

        date_grouping_key = date_string

        if self.interval_type == 'weekly':
            last_complete_date = self.interval.date_b - datetime.timedelta(
                days=1)  # pylint: disable=no-member
            last_weekday = last_complete_date.isoweekday()

            split_date = date_string.split('-')
            event_date = datetime.date(int(split_date[0]), int(split_date[1]),
                                       int(split_date[2]))
            event_weekday = event_date.isoweekday()

            days_until_end = last_weekday - event_weekday
            if days_until_end < 0:
                days_until_end += 7

            end_of_week_date = event_date + datetime.timedelta(
                days=days_until_end)
            date_grouping_key = end_of_week_date.isoformat()

        elif self.interval_type == 'all':
            # If gathering all data for a given user, use the last complete day of the interval
            # for joining with enrollment.
            last_complete_date = self.interval.date_b - datetime.timedelta(
                days=1)  # pylint: disable=no-member
            date_grouping_key = last_complete_date.isoformat()

        yield ((date_grouping_key, course_id, username),
               (entity_id, event_type, json.dumps(info), date_string))

        if forum_post_voted:
            # We emit two events for each "voted" event - one for the voting user and one for the
            # user receiving the vote.
            username = event_data.get('target_username')
            if not username:
                return
            event_type = 'edx.forum.{}.vote_received'.format(
                forum_post_voted.group('post_type'))
            yield ((date_grouping_key, course_id, username),
                   (entity_id, event_type, json.dumps(info), date_string))
Exemple #20
0
    def mapper(self, line):
        # Add a filter here to permit quicker rejection of unrelated events.
        if VIDEO_EVENT_MINIMUM_STRING not in line:
            # self.incr_counter(self.counter_category_name, 'Discard Missing Video String', 1)
            return

        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _date_string = value
        # self.incr_counter(self.counter_category_name, 'Inputs with Dates', 1)

        event_type = event.get('event_type')
        if event_type is None:
            log.error("encountered event with no event_type: %s", event)
            self.incr_counter(self.counter_category_name, 'Discard Missing Event Type', 1)
            return

        if event_type not in VIDEO_EVENT_TYPES:
            # self.incr_counter(self.counter_category_name, 'Discard Non-Video Event Type', 1)
            return

        # self.incr_counter(self.counter_category_name, 'Input Video Events', 1)

        # This has already been checked when getting the event, so just fetch the value.
        timestamp = eventlog.get_event_time_string(event)

        user_id = event.get('context', {}).get('user_id')
        if not user_id:
            log.error("Video event without user_id in context: %s", event)
            return
        # Convert user_id to int if str
        if not isinstance(user_id, int):
            user_id = int(user_id)

        course_id = eventlog.get_course_id(event)
        if course_id is None:
            log.warn('Video event without valid course_id: {0}'.format(line))
            # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
            # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing course_id', 1)
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            # This should already have been logged.
            # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
            # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Event Data', 1)
            return

        encoded_module_id = event_data.get('id', '').strip()  # we have seen id values with leading newline
        if not encoded_module_id:
            log.warn('Video event without valid encoded_module_id (id): {0}'.format(line))
            # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
            # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing encoded_module_id', 1)
            return

        video_duration = event_data.get('duration', VIDEO_UNKNOWN_DURATION)
        if not video_duration:
            # events may have a 'duration' value of null, so use the same default for those as well.
            video_duration = VIDEO_UNKNOWN_DURATION

        # self.incr_counter(self.counter_category_name, 'Video Events Before Time Check', 1)

        current_time = None
        old_time = None
        youtube_id = None
        if event_type == VIDEO_PLAYED:
            code = event_data.get('code')
            if code not in VIDEO_CODES:
                youtube_id = code
            current_time = self._check_time_offset(event_data.get('currentTime'), line)
            if current_time is None:
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Time', 1)
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Time From Play', 1)
                return
            # Slow: self.incr_counter(self.counter_category_name, 'Subset Play', 1)
        elif event_type == VIDEO_PAUSED:
            # Pause events may have a missing currentTime value if video is paused at the beginning,
            # so provide a default of zero.
            current_time = self._check_time_offset(event_data.get('currentTime', 0), line)
            if current_time is None:
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Time', 1)
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Time From Pause', 1)
                return
            # Slow: self.incr_counter(self.counter_category_name, 'Subset Pause', 1)
        elif event_type == VIDEO_SEEK:
            current_time = self._check_time_offset(event_data.get('new_time'), line)
            old_time = self._check_time_offset(event_data.get('old_time'), line)
            if current_time is None or old_time is None:
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Time', 1)
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Time From Seek', 1)
                return
            # Slow: self.incr_counter(self.counter_category_name, 'Subset Seek', 1)
        elif event_type == VIDEO_STOPPED:
            current_time = self._check_time_offset(event_data.get('currentTime'), line)
            if current_time is None:
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Something', 1)
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Time', 1)
                # Slow: self.incr_counter(self.counter_category_name, 'Discard Video Missing Time From Stop', 1)
                return
            # Slow: self.incr_counter(self.counter_category_name, 'Subset Stop', 1)

        if youtube_id is not None:
            youtube_id = youtube_id.encode('utf8')

        # self.incr_counter(self.counter_category_name, 'Output Video Events from Mapper', 1)
        yield (
            (user_id, course_id.encode('utf8'), encoded_module_id.encode('utf8')),
            (timestamp, event_type, current_time, old_time, youtube_id, video_duration)
        )
 def test_get_bad_string_event_data(self):
     item = {"event": "a string but not JSON"}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.error.call_count, 1)
     self.assertIn('unparsable event value',
                   self.mock_log.error.call_args[0][0])
 def test_get_dict_event_data(self):
     item = {"event": {"a dict": "that has strings"}}
     self.assertEquals(eventlog.get_event_data(item),
                       {"a dict": "that has strings"})
     self.assertEquals(self.mock_log.error.call_count, 0)
     self.assertEquals(self.mock_log.debug.call_count, 0)
 def test_missing_event_data(self):
     item = {"something else": "not an event"}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.error.call_count, 1)
     self.assertIn('missing event value', self.mock_log.error.call_args[0][0])
 def test_get_key_value_string_event_data(self):
     item = {"event": "a=3,b=4"}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.debug.call_count, 1)
     self.assertIn('key-value pairs', self.mock_log.debug.call_args[0][0])
    def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, date_string = value

        username = event.get('username', '').strip()
        if not username:
            return

        event_type = event.get('event_type')
        if event_type is None:
            return

        course_id = eventlog.get_course_id(event)
        if not course_id:
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        event_source = event.get('event_source')

        entity_id = ''
        info = {}
        forum_post_voted = None
        if event_type == 'problem_check':
            if event_source != 'server':
                return

            problem_id = event_data.get('problem_id')
            if not problem_id:
                return

            entity_id = problem_id
            if event_data.get('success', 'incorrect').lower() == 'correct':
                info['correct'] = True
        elif event_type == 'play_video':
            encoded_module_id = event_data.get('id')
            if not encoded_module_id:
                return

            entity_id = encoded_module_id
        elif event_type[:9] == '/courses/' and re.match(self.SUBSECTION_ACCESSED_PATTERN, event_type):
            timestamp = eventlog.get_event_time_string(event)
            if timestamp is None:
                return
            info['path'] = event_type
            info['timestamp'] = timestamp
            event_type = SUBSECTION_VIEWED_MARKER
        elif event_type.startswith('edx.forum'):
            forum_post_voted = re.match(r'edx\.forum\.(?P<post_type>\w+)\.voted', event_type)
            if forum_post_voted:
                info['vote_value'] = event_data.get('vote_value')
                if info['vote_value'] not in ['up', 'down']:
                    return
                info['undo_vote'] = event_data.get('undo_vote', False)

        date_grouping_key = date_string

        if self.interval_type == 'weekly':
            last_complete_date = self.interval.date_b - datetime.timedelta(days=1)  # pylint: disable=no-member
            last_weekday = last_complete_date.isoweekday()

            split_date = date_string.split('-')
            event_date = datetime.date(int(split_date[0]), int(split_date[1]), int(split_date[2]))
            event_weekday = event_date.isoweekday()

            days_until_end = last_weekday - event_weekday
            if days_until_end < 0:
                days_until_end += 7

            end_of_week_date = event_date + datetime.timedelta(days=days_until_end)
            date_grouping_key = end_of_week_date.isoformat()

        elif self.interval_type == 'all':
            # If gathering all data for a given user, use the last complete day of the interval
            # for joining with enrollment.
            last_complete_date = self.interval.date_b - datetime.timedelta(days=1)  # pylint: disable=no-member
            date_grouping_key = last_complete_date.isoformat()

        yield ((date_grouping_key, course_id, username), (entity_id, event_type, json.dumps(info), date_string))

        if forum_post_voted:
            # We emit two events for each "voted" event - one for the voting user and one for the
            # user receiving the vote.
            username = event_data.get('target_username')
            if not username:
                return
            event_type = 'edx.forum.{}.vote_received'.format(forum_post_voted.group('post_type'))
            yield ((date_grouping_key, course_id, username), (entity_id, event_type, json.dumps(info), date_string))
Exemple #26
0
 def test_missing_event_data(self):
     item = {"something else": "not an event"}
     self.assertIsNone(eventlog.get_event_data(item))
Exemple #27
0
 def test_get_dict_event_data(self):
     item = {"event": {"a dict": "that has strings"}}
     self.assertEquals(eventlog.get_event_data(item), {"a dict": "that has strings"})
Exemple #28
0
 def test_event_data_with_unknown_type(self):
     item = {"event": ["a list", "of strings"]}
     self.assertIsNone(eventlog.get_event_data(item))
Exemple #29
0
 def test_get_json_string_event_data(self):
     item = {"event": '{ "a string": "that is JSON"}'}
     self.assertEquals(eventlog.get_event_data(item), {"a string": "that is JSON"})
Exemple #30
0
 def test_get_empty_string_event_data(self):
     item = {"event": ''}
     self.assertEquals(eventlog.get_event_data(item), {})
    def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, date_string = value

        username = event.get('username', '').strip()
        if not username:
            return

        event_type = event.get('event_type')
        if event_type is None:
            return

        course_id = eventlog.get_course_id(event)
        if not course_id:
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        event_source = event.get('event_source')

        entity_id = ''
        info = {}
        if event_type == 'problem_check':
            if event_source != 'server':
                return

            problem_id = event_data.get('problem_id')
            if not problem_id:
                return

            entity_id = problem_id
            if event_data.get('success', 'incorrect').lower() == 'correct':
                info['correct'] = True
        elif event_type == 'play_video':
            encoded_module_id = event_data.get('id')
            if not encoded_module_id:
                return

            entity_id = encoded_module_id
        elif event_type[:9] == '/courses/' and re.match(self.SUBSECTION_ACCESSED_PATTERN, event_type):
            timestamp = eventlog.get_event_time_string(event)
            if timestamp is None:
                return
            info['path'] = event_type
            info['timestamp'] = timestamp
            event_type = SUBSECTION_VIEWED_MARKER

        date_grouping_key = date_string

        if self.interval_type == 'weekly':
            last_complete_date = self.interval.date_b - datetime.timedelta(days=1)  # pylint: disable=no-member
            last_weekday = last_complete_date.isoweekday()

            split_date = date_string.split('-')
            event_date = datetime.date(int(split_date[0]), int(split_date[1]), int(split_date[2]))
            event_weekday = event_date.isoweekday()

            days_until_end = last_weekday - event_weekday
            if days_until_end < 0:
                days_until_end += 7

            end_of_week_date = event_date + datetime.timedelta(days=days_until_end)
            date_grouping_key = end_of_week_date.isoformat()

        elif self.interval_type == 'all':
            # If gathering all data for a given user, use the last complete day of the interval
            # for joining with enrollment.
            last_complete_date = self.interval.date_b - datetime.timedelta(days=1)  # pylint: disable=no-member
            date_grouping_key = last_complete_date.isoformat()

        yield ((date_grouping_key, course_id, username), (entity_id, event_type, json.dumps(info), date_string))
 def test_event_data_with_unknown_type(self):
     item = {"event": 1}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.error.call_count, 1)
     self.assertIn('unrecognized type', self.mock_log.error.call_args[0][0])
 def test_get_json_string_event_data(self):
     item = {"event": '{ "a string": "that is JSON"}'}
     self.assertEquals(eventlog.get_event_data(item), {"a string": "that is JSON"})
     self.assertEquals(self.mock_log.error.call_count, 0)
     self.assertEquals(self.mock_log.debug.call_count, 0)
 def test_get_json_string_event_data(self):
     item = {"event": '{ "a string": "that is JSON"}'}
     self.assertEquals(eventlog.get_event_data(item),
                       {"a string": "that is JSON"})
     self.assertEquals(self.mock_log.error.call_count, 0)
     self.assertEquals(self.mock_log.debug.call_count, 0)
Exemple #35
0
    def mapper(self, line):
        # Add a filter here to permit quicker rejection of unrelated events.
        if VIDEO_EVENT_MINIMUM_STRING not in line:
            return

        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _date_string = value

        event_type = event.get('event_type')
        if event_type is None:
            log.error("encountered event with no event_type: %s", event)
            return

        if event_type not in VIDEO_EVENT_TYPES:
            return

        # This has already been checked when getting the event, so just fetch the value.
        timestamp = eventlog.get_event_time_string(event)

        # Strip username to remove trailing newlines that mess up Luigi.
        username = event.get('username', '').strip()
        if not username:
            log.error("Video event without username: %s", event)
            return

        course_id = eventlog.get_course_id(event)
        if course_id is None:
            log.warn('Video event without valid course_id: {0}'.format(line))
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            # This should already have been logged.
            return

        encoded_module_id = event_data.get('id')
        if encoded_module_id is None:
            log.warn('Video event without valid encoded_module_id (id): {0}'.format(line))
            return

        current_time = None
        old_time = None
        youtube_id = None
        if event_type == VIDEO_PLAYED:
            code = event_data.get('code')
            if code not in ('html5', 'mobile'):
                youtube_id = code
            current_time = self._check_time_offset(event_data.get('currentTime'), line)
            if current_time is None:
                return
        elif event_type == VIDEO_PAUSED:
            # Pause events may have a missing currentTime value if video is paused at the beginning,
            # so provide a default of zero.
            current_time = self._check_time_offset(event_data.get('currentTime', 0), line)
            if current_time is None:
                return
        elif event_type == VIDEO_SEEK:
            current_time = self._check_time_offset(event_data.get('new_time'), line)
            old_time = self._check_time_offset(event_data.get('old_time'), line)
            if current_time is None or old_time is None:
                return
        elif event_type == VIDEO_STOPPED:
            current_time = self._check_time_offset(event_data.get('currentTime'), line)
            if current_time is None:
                return

        if youtube_id is not None:
            youtube_id = youtube_id.encode('utf8')

        yield (
            (username.encode('utf8'), course_id.encode('utf8'), encoded_module_id.encode('utf8')),
            (timestamp, event_type, current_time, old_time, youtube_id)
        )
 def test_event_data_with_list_type(self):
     item = {"event": ["a list", "of strings"]}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.debug.call_count, 1)
     self.assertIn('value of type list',
                   self.mock_log.debug.call_args[0][0])
 def test_get_empty_string_event_data(self):
     item = {"event": ''}
     self.assertEquals(eventlog.get_event_data(item), {})
     self.assertEquals(self.mock_log.error.call_count, 0)
     self.assertEquals(self.mock_log.debug.call_count, 0)
 def test_event_data_with_unknown_type(self):
     item = {"event": 1}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.error.call_count, 1)
     self.assertIn('unrecognized type', self.mock_log.error.call_args[0][0])
Exemple #39
0
    def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, date_string = value

        username = event.get('username', '').strip()
        if not username:
            return

        event_type = event.get('event_type')
        if event_type is None:
            return

        course_id = eventlog.get_course_id(event)
        if not course_id:
            return

        event_data = eventlog.get_event_data(event)
        if event_data is None:
            return

        event_source = event.get('event_source')

        entity_id = ''
        info = {}
        if event_type == 'problem_check':
            if event_source != 'server':
                return

            problem_id = event_data.get('problem_id')
            if not problem_id:
                return

            entity_id = problem_id
            if event_data.get('success', 'incorrect').lower() == 'correct':
                info['correct'] = True
        elif event_type == 'play_video':
            encoded_module_id = event_data.get('id')
            if not encoded_module_id:
                return

            entity_id = encoded_module_id
        elif event_type[:9] == '/courses/' and re.match(
                self.SUBSECTION_ACCESSED_PATTERN, event_type):
            timestamp = eventlog.get_event_time_string(event)
            if timestamp is None:
                return
            info['path'] = event_type
            info['timestamp'] = timestamp
            event_type = SUBSECTION_VIEWED_MARKER

        date_grouping_key = date_string

        if self.interval_type == 'weekly':
            last_complete_date = self.interval.date_b - datetime.timedelta(
                days=1)  # pylint: disable=no-member
            last_weekday = last_complete_date.isoweekday()

            split_date = date_string.split('-')
            event_date = datetime.date(int(split_date[0]), int(split_date[1]),
                                       int(split_date[2]))
            event_weekday = event_date.isoweekday()

            days_until_end = last_weekday - event_weekday
            if days_until_end < 0:
                days_until_end += 7

            end_of_week_date = event_date + datetime.timedelta(
                days=days_until_end)
            date_grouping_key = end_of_week_date.isoformat()

        elif self.interval_type == 'all':
            # If gathering all data for a given user, use the last complete day of the interval
            # for joining with enrollment.
            last_complete_date = self.interval.date_b - datetime.timedelta(
                days=1)  # pylint: disable=no-member
            date_grouping_key = last_complete_date.isoformat()

        yield ((date_grouping_key, course_id, username),
               (entity_id, event_type, json.dumps(info), date_string))
def get_explicit_enrollment_output(line):
    """
    Generates output values for explicit enrollment events.

    Args:

      line: text line from a tracking event log.

    Returns:

      (course_id, user_id), (timestamp, action_value)

        where action_value = 1 (enrolled) or -1 (unenrolled)
        and timestamp is in ISO format, with resolution to the millisecond.

      or None if there is no valid enrollment event on the line.

    Example:
            (edX/DemoX/Demo_Course, dummy_userid), (2013-09-10T00:01:05.123456, 1)

    """
    # Before parsing, check that the line contains something that
    # suggests it's an enrollment event.
    if 'edx.course.enrollment' not in line:
        return None

    # try to parse the line into a dict:
    event = eventlog.parse_json_event(line)
    if event is None:
        # The line didn't parse.  For this specific purpose,
        # we can assume that all enrollment-related lines would parse,
        # and these non-parsing lines would get skipped anyway.
        return None

    # get event type, and check that it exists:
    event_type = event.get('event_type')
    if event_type is None:
        log.error("encountered event with no event_type: %s", event)
        return None

    # convert the type to a value:
    if event_type == 'edx.course.enrollment.activated':
        action_value = ENROLLED
    elif event_type == 'edx.course.enrollment.deactivated':
        action_value = UNENROLLED
    else:
        # not an enrollment event...
        return None

    # get the timestamp:
    datetime = eventlog.get_event_time(event)
    if datetime is None:
        log.error("encountered event with bad datetime: %s", event)
        return None
    timestamp = eventlog.datetime_to_timestamp(datetime)

    # Use the `user_id` from the event `data` field, since the
    # `user_id` in the `context` field is the user who made the
    # request but not necessarily the one who got enrolled.  (The
    # `course_id` should be the same in `context` as in `data`.)

    # Get the event data:
    event_data = eventlog.get_event_data(event)
    if event_data is None:
        # Assume it's already logged (and with more specifics).
        return None

    # Get the course_id from the data, and validate.
    course_id = event_data['course_id']
    if not opaque_key_util.is_valid_course_id(course_id):
        log.error("encountered explicit enrollment event with bogus course_id: %s", event)
        return None

    # Get the user_id from the data:
    user_id = event_data.get('user_id')
    if user_id is None:
        log.error("encountered explicit enrollment event with no user_id: %s", event)
        return None

    # For now, ignore the enrollment 'mode' (e.g. 'honor').

    return (course_id, user_id), (timestamp, action_value)
Exemple #41
0
def get_explicit_enrollment_output(line):
    """
    Generates output values for explicit enrollment events.

    Args:

      line: text line from a tracking event log.

    Returns:

      (course_id, user_id), (timestamp, action_value)

        where action_value = 1 (enrolled) or -1 (unenrolled)
        and timestamp is in ISO format, with resolution to the millisecond.

      or None if there is no valid enrollment event on the line.

    Example:
            (edX/DemoX/Demo_Course, dummy_userid), (2013-09-10T00:01:05.123456, 1)

    """
    # Before parsing, check that the line contains something that
    # suggests it's an enrollment event.
    if 'edx.course.enrollment' not in line:
        return None

    # try to parse the line into a dict:
    event = eventlog.parse_json_event(line)
    if event is None:
        # The line didn't parse.  For this specific purpose,
        # we can assume that all enrollment-related lines would parse,
        # and these non-parsing lines would get skipped anyway.
        return None

    # get event type, and check that it exists:
    event_type = event.get('event_type')
    if event_type is None:
        log.error("encountered event with no event_type: %s", event)
        return None

    # convert the type to a value:
    if event_type == 'edx.course.enrollment.activated':
        action_value = ENROLLED
    elif event_type == 'edx.course.enrollment.deactivated':
        action_value = UNENROLLED
    else:
        # not an enrollment event...
        return None

    # get the timestamp:
    datetime = eventlog.get_event_time(event)
    if datetime is None:
        log.error("encountered event with bad datetime: %s", event)
        return None
    timestamp = eventlog.datetime_to_timestamp(datetime)

    # Use the `user_id` from the event `data` field, since the
    # `user_id` in the `context` field is the user who made the
    # request but not necessarily the one who got enrolled.  (The
    # `course_id` should be the same in `context` as in `data`.)

    # Get the event data:
    event_data = eventlog.get_event_data(event)
    if event_data is None:
        # Assume it's already logged (and with more specifics).
        return None

    # Get the course_id from the data, and validate.
    course_id = event_data['course_id']
    if not opaque_key_util.is_valid_course_id(course_id):
        log.error(
            "encountered explicit enrollment event with bogus course_id: %s",
            event)
        return None

    # Get the user_id from the data:
    user_id = event_data.get('user_id')
    if user_id is None:
        log.error("encountered explicit enrollment event with no user_id: %s",
                  event)
        return None

    # For now, ignore the enrollment 'mode' (e.g. 'honor').

    return (course_id, user_id), (timestamp, action_value)
 def test_get_dict_event_data(self):
     item = {"event": {"a dict": "that has strings"}}
     self.assertEquals(eventlog.get_event_data(item), {"a dict": "that has strings"})
     self.assertEquals(self.mock_log.error.call_count, 0)
     self.assertEquals(self.mock_log.debug.call_count, 0)
 def test_missing_event_data(self):
     item = {"something else": "not an event"}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.error.call_count, 1)
     self.assertIn('missing event value',
                   self.mock_log.error.call_args[0][0])
 def test_get_bad_string_event_data(self):
     item = {"event": "a string but not JSON"}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.error.call_count, 1)
     self.assertIn('unparsable event value', self.mock_log.error.call_args[0][0])
 def test_get_empty_string_event_data(self):
     item = {"event": ''}
     self.assertEquals(eventlog.get_event_data(item), {})
     self.assertEquals(self.mock_log.error.call_count, 0)
     self.assertEquals(self.mock_log.debug.call_count, 0)
 def test_get_key_value_string_event_data(self):
     item = {"event": "a=3,b=4"}
     self.assertIsNone(eventlog.get_event_data(item))
     self.assertEquals(self.mock_log.debug.call_count, 1)
     self.assertIn('key-value pairs', self.mock_log.debug.call_args[0][0])
Exemple #47
0
 def test_get_bad_string_event_data(self):
     item = {"event": "a string but not JSON"}
     self.assertIsNone(eventlog.get_event_data(item))