Example #1
0
    def obfuscate_wiki_entry(self, line, user_profile):
        fields = line.rstrip('\r\n').decode('utf8').split('\t')
        record = ArticleRevisionRecord(*fields)

        user_info = {}
        if user_profile is not None:
            user_id = record.user_id
            if user_id != 'NULL':
                profile_entry = user_profile.get(user_id)
                if profile_entry is None:
                    log.error("Missing profile entry for user_id %s", user_id)
                else:
                    user_info['name'] = [
                        profile_entry.name,
                    ]

        if record.ip_address != 'NULL' and record.ip_address != 'ip_address':
            log.warning("Found non-NULL IP address")
        if record.automatic_log != '' and record.automatic_log != 'automatic_log':
            log.warning(u"Found non-zero-length automatic_log: %s",
                        record.automatic_log)

        # Can't reset values, so update original fields.
        fields[12] = backslash_encode_value(
            self.obfuscator.obfuscate_text(
                backslash_decode_value(record.content), user_info))
        fields[2] = backslash_encode_value(
            self.obfuscator.obfuscate_text(
                backslash_decode_value(record.user_message), user_info))
        return u"\t".join(fields).encode('utf-8')
    def filter_row(self, row):
        user_id = row[5]
        user_info = {}
        if user_id != 'NULL':
            user_id = int(user_id)
            user_info['user_id'] = [user_id, ]
            try:
                entry = self.user_by_id[user_id]
                if 'username' in entry:
                    user_info['username'] = [entry['username'], ]
                if 'name' in entry:
                    user_info['name'] = [entry['name'], ]
            except KeyError:
                log.error("Unable to find wiki user_id: %s in the user_by_id map", user_id)

        row[2] = ''  # user_message
        row[3] = ''  # automatic_log
        row[4] = ''  # ip_address
        # For user_id, preserve 'NULL' value if present.
        if user_id != 'NULL':
            row[5] = self.remap_id(user_id)

        wiki_content = backslash_decode_value(row[12].decode('utf8'))
        cleaned_content = self.obfuscator.obfuscate_text(wiki_content, user_info)
        row[12] = backslash_encode_value(cleaned_content).encode('utf8')

        return row
Example #3
0
    def filter_row(self, row):
        user_id = row[5]
        user_info = {}
        if user_id != 'NULL':
            user_id = int(user_id)
            user_info['user_id'] = [user_id, ]
            try:
                entry = self.user_by_id[user_id]
                if 'username' in entry:
                    user_info['username'] = [entry['username'], ]
                if 'name' in entry:
                    user_info['name'] = [entry['name'], ]
            except KeyError:
                log.error("Unable to find wiki user_id: %s in the user_by_id map", user_id)

        row[2] = ''  # user_message
        row[3] = ''  # automatic_log
        row[4] = ''  # ip_address
        # For user_id, preserve 'NULL' value if present.
        if user_id != 'NULL':
            row[5] = self.remap_id(user_id)

        wiki_content = backslash_decode_value(row[12].decode('utf8'))
        cleaned_content = self.obfuscator.obfuscate_text(wiki_content, user_info)
        row[12] = backslash_encode_value(cleaned_content).encode('utf8')

        return row
    def obfuscate_wiki_entry(self, line, user_profile):
        fields = line.rstrip('\r\n').decode('utf8').split('\t')
        record = ArticleRevisionRecord(*fields)

        user_info = {}
        if user_profile is not None:
            user_id = record.user_id
            if user_id != 'NULL':
                profile_entry = user_profile.get(user_id)
                if profile_entry is None:
                    log.error("Missing profile entry for user_id %s", user_id)
                else:
                    user_info['name'] = [profile_entry.name, ]

        if record.ip_address != 'NULL' and record.ip_address != 'ip_address':
            log.warning("Found non-NULL IP address")
        if record.automatic_log != '' and record.automatic_log != 'automatic_log':
            log.warning(u"Found non-zero-length automatic_log: %s", record.automatic_log)

        # Can't reset values, so update original fields.
        fields[12] = backslash_encode_value(self.obfuscator.obfuscate_text(backslash_decode_value(record.content), user_info))
        fields[2] = backslash_encode_value(self.obfuscator.obfuscate_text(backslash_decode_value(record.user_message), user_info))
        return u"\t".join(fields).encode('utf-8')
def load_user_info(userinfo_path):
    """Reads a custom user-info file from the local fs that contains username, email, user-id, fullname."""
    result = {}
    with open(userinfo_path, 'r') as infile:
        next(infile, None)
        for line in infile:
            fields = line.rstrip('\r\n').decode('utf8').split('\t')
            # Once split, we can clean up the individual entries, and interpret the embedded newlines and tabs.
            # We'll also strip here, to remove the additional whitespace on usernames and fullnames.
            fields = [backslash_decode_value(field).strip() for field in fields]
            record = UserInfoRecord(*fields)
            entry = {key: [value, ] for key, value in record.__dict__.iteritems()}
            # Store records twice, once with an int key, and once with a string key.
            # (They should therefore not collide.)
            result[int(record.user_id)] = entry
            result[record.username] = entry
    return result
Example #6
0
def load_user_info(userinfo_path):
    """Reads a custom user-info file from the local fs that contains username, email, user-id, fullname."""
    result = {}
    with open(userinfo_path, 'r') as infile:
        next(infile, None)
        for line in infile:
            fields = line.rstrip('\r\n').decode('utf8').split('\t')
            # Once split, we can clean up the individual entries, and interpret the embedded newlines and tabs.
            # We'll also strip here, to remove the additional whitespace on usernames and fullnames.
            fields = [
                backslash_decode_value(field).strip() for field in fields
            ]
            record = UserInfoRecord(*fields)
            entry = {
                key: [
                    value,
                ]
                for key, value in record.__dict__.iteritems()
            }
            # Store records twice, once with an int key, and once with a string key.
            # (They should therefore not collide.)
            result[int(record.user_id)] = entry
            result[record.username] = entry
    return result
 def test_decoding(self, text, expected_result):
     self.assertEquals(obfuscate_util.backslash_decode_value(text),
                       expected_result)
 def test_encoding_round_trip(self, text):
     self.assertEquals(
         text,
         obfuscate_util.backslash_decode_value(
             obfuscate_util.backslash_encode_value(text)))
 def test_decoding(self, text, expected_result):
     self.assertEquals(obfuscate_util.backslash_decode_value(text), expected_result)
 def test_encoding_round_trip(self, text):
     self.assertEquals(text, obfuscate_util.backslash_decode_value(obfuscate_util.backslash_encode_value(text)))