def mapper(self, line):
        # Get events prefiltered by interval:
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _date_string = value

        username = event.get('username')
        if not username:
            return
        username = username.strip()

        # Get timestamp instead of date string, so we get the latest ip
        # address for events on the same day.
        # TODO: simplify the round-trip conversion, so that we don't have to wait for the slow parse.
        # The parse provides error checking, allowing bad dates to be skipped.
        # But we may now have enough confidence in the data that this is rare (if ever).
        # Or we could implement a faster check, e.g. reject any that are "greater" than now.
        timestamp_as_datetime = eventlog.get_event_time(event)
        if timestamp_as_datetime is None:
            return
        timestamp = eventlog.datetime_to_timestamp(timestamp_as_datetime)

        ip_address = event.get('ip')
        if not ip_address:
            log.warning("No ip_address found for user '%s' on '%s'.", username, timestamp)
            return

        yield username, (timestamp, ip_address)
Beispiel #2
0
    def mapper(self, line):
        event = eventlog.parse_json_event(line)
        if event is None:
            return

        username = event.get('username')
        if not username:
            return

        stripped_username = username.strip()
        if username != stripped_username:
            log.error("User '%s' has extra whitespace, which is being stripped. Event: %s", username, event)
            username = stripped_username

        timestamp_as_datetime = eventlog.get_event_time(event)
        if timestamp_as_datetime is None:
            return

        if timestamp_as_datetime >= self.end_datetime:
            return

        timestamp = eventlog.datetime_to_timestamp(timestamp_as_datetime)

        ip_address = event.get('ip')
        if not ip_address:
            log.warning("No ip_address found for user '%s' on '%s'.", username, timestamp)
            return

        yield username, (timestamp, ip_address)
Beispiel #3
0
    def mapper(self, line):
        # Get events prefiltered by interval:
        value = self.get_event_and_date_string(line)
        if value is None:
            return
        event, _date_string = value

        username = event.get('username')
        if not username:
            return
        username = username.strip()

        # Get timestamp instead of date string, so we get the latest ip
        # address for events on the same day.
        # TODO: simplify the round-trip conversion, so that we don't have to wait for the slow parse.
        # The parse provides error checking, allowing bad dates to be skipped.
        # But we may now have enough confidence in the data that this is rare (if ever).
        # Or we could implement a faster check, e.g. reject any that are "greater" than now.
        timestamp_as_datetime = eventlog.get_event_time(event)
        if timestamp_as_datetime is None:
            return
        timestamp = eventlog.datetime_to_timestamp(timestamp_as_datetime)

        ip_address = event.get('ip')
        if not ip_address:
            log.warning("No ip_address found for user '%s' on '%s'.", username,
                        timestamp)
            return

        yield username, (timestamp, ip_address)
 def test_good_datetime_with_no_microseconds_or_timezone(self):
     item = {"time": "2013-12-17T15:38:32"}
     dt_value = eventlog.get_event_time(item)
     self.assertIsNotNone(dt_value)
     self.assertEquals(eventlog.datetime_to_timestamp(dt_value),
                       "2013-12-17T15:38:32")
     self.assertEquals(eventlog.datetime_to_datestamp(dt_value),
                       "2013-12-17")
def get_explicit_enrollment_output(line):
    """
    Generates output values for explicit enrollment events.

    Args:

      line: text line from a tracking event log.

    Returns:

      (course_id, user_id), (timestamp, action_value)

        where action_value = 1 (enrolled) or -1 (unenrolled)
        and timestamp is in ISO format, with resolution to the millisecond.

      or None if there is no valid enrollment event on the line.

    Example:
            (edX/DemoX/Demo_Course, dummy_userid), (2013-09-10T00:01:05.123456, 1)

    """
    # Before parsing, check that the line contains something that
    # suggests it's an enrollment event.
    if 'edx.course.enrollment' not in line:
        return None

    # try to parse the line into a dict:
    event = eventlog.parse_json_event(line)
    if event is None:
        # The line didn't parse.  For this specific purpose,
        # we can assume that all enrollment-related lines would parse,
        # and these non-parsing lines would get skipped anyway.
        return None

    # get event type, and check that it exists:
    event_type = event.get('event_type')
    if event_type is None:
        log.error("encountered event with no event_type: %s", event)
        return None

    # convert the type to a value:
    if event_type == 'edx.course.enrollment.activated':
        action_value = ENROLLED
    elif event_type == 'edx.course.enrollment.deactivated':
        action_value = UNENROLLED
    else:
        # not an enrollment event...
        return None

    # get the timestamp:
    datetime = eventlog.get_event_time(event)
    if datetime is None:
        log.error("encountered event with bad datetime: %s", event)
        return None
    timestamp = eventlog.datetime_to_timestamp(datetime)

    # Use the `user_id` from the event `data` field, since the
    # `user_id` in the `context` field is the user who made the
    # request but not necessarily the one who got enrolled.  (The
    # `course_id` should be the same in `context` as in `data`.)

    # Get the event data:
    event_data = eventlog.get_event_data(event)
    if event_data is None:
        # Assume it's already logged (and with more specifics).
        return None

    # Get the course_id from the data, and validate.
    course_id = event_data['course_id']
    if not opaque_key_util.is_valid_course_id(course_id):
        log.error("encountered explicit enrollment event with bogus course_id: %s", event)
        return None

    # Get the user_id from the data:
    user_id = event_data.get('user_id')
    if user_id is None:
        log.error("encountered explicit enrollment event with no user_id: %s", event)
        return None

    # For now, ignore the enrollment 'mode' (e.g. 'honor').

    return (course_id, user_id), (timestamp, action_value)
Beispiel #6
0
def get_explicit_enrollment_output(line):
    """
    Generates output values for explicit enrollment events.

    Args:

      line: text line from a tracking event log.

    Returns:

      (course_id, user_id), (timestamp, action_value)

        where action_value = 1 (enrolled) or -1 (unenrolled)
        and timestamp is in ISO format, with resolution to the millisecond.

      or None if there is no valid enrollment event on the line.

    Example:
            (edX/DemoX/Demo_Course, dummy_userid), (2013-09-10T00:01:05.123456, 1)

    """
    # Before parsing, check that the line contains something that
    # suggests it's an enrollment event.
    if 'edx.course.enrollment' not in line:
        return None

    # try to parse the line into a dict:
    event = eventlog.parse_json_event(line)
    if event is None:
        # The line didn't parse.  For this specific purpose,
        # we can assume that all enrollment-related lines would parse,
        # and these non-parsing lines would get skipped anyway.
        return None

    # get event type, and check that it exists:
    event_type = event.get('event_type')
    if event_type is None:
        log.error("encountered event with no event_type: %s", event)
        return None

    # convert the type to a value:
    if event_type == 'edx.course.enrollment.activated':
        action_value = ENROLLED
    elif event_type == 'edx.course.enrollment.deactivated':
        action_value = UNENROLLED
    else:
        # not an enrollment event...
        return None

    # get the timestamp:
    datetime = eventlog.get_event_time(event)
    if datetime is None:
        log.error("encountered event with bad datetime: %s", event)
        return None
    timestamp = eventlog.datetime_to_timestamp(datetime)

    # Use the `user_id` from the event `data` field, since the
    # `user_id` in the `context` field is the user who made the
    # request but not necessarily the one who got enrolled.  (The
    # `course_id` should be the same in `context` as in `data`.)

    # Get the event data:
    event_data = eventlog.get_event_data(event)
    if event_data is None:
        # Assume it's already logged (and with more specifics).
        return None

    # Get the course_id from the data, and validate.
    course_id = event_data['course_id']
    if not opaque_key_util.is_valid_course_id(course_id):
        log.error(
            "encountered explicit enrollment event with bogus course_id: %s",
            event)
        return None

    # Get the user_id from the data:
    user_id = event_data.get('user_id')
    if user_id is None:
        log.error("encountered explicit enrollment event with no user_id: %s",
                  event)
        return None

    # For now, ignore the enrollment 'mode' (e.g. 'honor').

    return (course_id, user_id), (timestamp, action_value)
 def test_good_datetime_with_no_microseconds_or_timezone(self):
     item = {"time": "2013-12-17T15:38:32"}
     dt_value = eventlog.get_event_time(item)
     self.assertIsNotNone(dt_value)
     self.assertEquals(eventlog.datetime_to_timestamp(dt_value), "2013-12-17T15:38:32")
     self.assertEquals(eventlog.datetime_to_datestamp(dt_value), "2013-12-17")