def compare_event_lines(self, expected_line, reducer_output_line):
     """Compare events as dicts instead of JSON-encoded strings, due to ordering."""
     reducer_output = eventlog.decode_json(reducer_output_line)
     expected_event = eventlog.decode_json(expected_line)
     self.assertDictEqual(expected_event, reducer_output)
Beispiel #2
0
 def compare_event_lines(self, expected_line, reducer_output_line):
     """Compare events as dicts instead of JSON-encoded strings, due to ordering."""
     reducer_output = eventlog.decode_json(reducer_output_line)
     expected_event = eventlog.decode_json(expected_line)
     self.assertDictEqual(expected_event, reducer_output)
Beispiel #3
0
    def obfuscate_event_entry(self, line):
        event = eventlog.parse_json_event(line)
        if event is None:
            # Unexpected here...
            log.error(u"Encountered event entry which failed to parse: %r",
                      line)
            return line
        course_id = eventlog.get_course_id(event, from_url=True)
        if course_id is None:
            # Unexpected here...
            log.error(u"Encountered event entry with no course_id: %r", line)
            return line

        # We cannot use this method as-is, since we need to know what was done to the event, so
        # that it can be transformed back to its original form once cleaned.
        # NOT event_data = eventlog.get_event_data(event)
        event_json_decoded = False
        event_data = event.get('event')

        if event_data is None:
            log.error(u"Encountered event entry with no 'event' payload: %r",
                      line)
        if event_data == '':
            # Note that this happens with some browser events.  Instead of
            # failing to parse it as a JSON string, just leave as-is.
            pass
        elif isinstance(event_data, basestring):
            # Cjson produces str, while json produces unicode.  Hmm.
            if len(event_data) == 512 and 'POST' in event_data:
                # It's a truncated JSON string.  But we're going to throw it out anyway, so no worries.
                pass
            elif '{' not in event_data and '=' in event_data:
                # It's a key-value pair from a browser event.  Just process as-is, rather than parsing and reassembling.
                pass
            else:
                try:
                    event_data = eventlog.decode_json(event_data)
                    event_json_decoded = True
                except Exception:
                    log.error(
                        u"Encountered event entry with unparseable 'event' payload: %r",
                        line)

        # TODO: update the comment!  This is where we traverse the event in search of values that should be "cleansed".
        # Much along the model of what we already do for 'state' in CWSM.  Except that we need to be more
        # flexible in determining the level of backslash encoding -- decode and re-encode as many levels as needed
        # to get to strings that can be properly interpreted.
        event_user_info = self.get_userinfo_from_event(event, event_data)

        if 'POST' in event_data:
            if self.parameters['skip_post']:
                return None

        updated_event_data = self.obfuscator.obfuscate_structure(
            event_data, u"event", event_user_info)

        if updated_event_data is not None:
            event_source = event.get('event_source')
            event_type = event.get('event_type')
            log.info(u"Obfuscated %s event with event_type = '%s'",
                     event_source, event_type)

            if event_json_decoded:
                # TODO: should really use cjson, if that were originally used for decoding the json.
                updated_event_data = json.dumps(updated_event_data)

            event['event'] = updated_event_data

        # TODO: should really use cjson, if that were originally used for decoding the json.
        return json.dumps(event)
    def obfuscate_event_entry(self, line):
        event = eventlog.parse_json_event(line)
        if event is None:
            # Unexpected here...
            log.error(u"Encountered event entry which failed to parse: %r", line)
            return line
        course_id = eventlog.get_course_id(event, from_url=True)
        if course_id is None:
            # Unexpected here...
            log.error(u"Encountered event entry with no course_id: %r", line)
            return line

        # We cannot use this method as-is, since we need to know what was done to the event, so
        # that it can be transformed back to its original form once cleaned.
        # NOT event_data = eventlog.get_event_data(event)
        event_json_decoded = False
        event_data = event.get('event')

        if event_data is None:
            log.error(u"Encountered event entry with no 'event' payload: %r", line)
        if event_data == '':
            # Note that this happens with some browser events.  Instead of
            # failing to parse it as a JSON string, just leave as-is.
            pass
        elif isinstance(event_data, basestring):
            # Cjson produces str, while json produces unicode.  Hmm.
            if len(event_data) == 512 and 'POST' in event_data:
                # It's a truncated JSON string.  But we're going to throw it out anyway, so no worries.
                pass
            elif '{' not in event_data and '=' in event_data:
                # It's a key-value pair from a browser event.  Just process as-is, rather than parsing and reassembling.
                pass
            else:
                try:
                    event_data = eventlog.decode_json(event_data)
                    event_json_decoded = True
                except Exception:
                    log.error(u"Encountered event entry with unparseable 'event' payload: %r", line)

        # TODO: update the comment!  This is where we traverse the event in search of values that should be "cleansed".
        # Much along the model of what we already do for 'state' in CWSM.  Except that we need to be more
        # flexible in determining the level of backslash encoding -- decode and re-encode as many levels as needed
        # to get to strings that can be properly interpreted.
        event_user_info = self.get_userinfo_from_event(event, event_data)

        if 'POST' in event_data:
            if self.parameters['skip_post']:
                return None

        updated_event_data = self.obfuscator.obfuscate_structure(event_data, u"event", event_user_info)

        if updated_event_data is not None:
            event_source = event.get('event_source')
            event_type = event.get('event_type')
            log.info(u"Obfuscated %s event with event_type = '%s'", event_source, event_type)

            if event_json_decoded:
                # TODO: should really use cjson, if that were originally used for decoding the json.
                updated_event_data = json.dumps(updated_event_data)

            event['event'] = updated_event_data

        # TODO: should really use cjson, if that were originally used for decoding the json.
        return json.dumps(event)