def test_parse_username_unicode_handling(self):
        unparsed_strings = [u'sès', u'مو', u'èm']
        expected_strings = ['S\xc3\xa8s', '\xd9\x85\xd9\x88', '\xc3\x88m']

        for unparsed, expected in zip(unparsed_strings, expected_strings):
            username = parse_username(unparsed)
            assert_equal(username, expected)
 def test_validate_username(self):
     # this username has a few problems that the normalize call should handle
     # 1. normal ascii space in front
     # 2. lowercase
     # 3. nasty trailing unicode space (the reason this file has coding:utf-8)
     problem_username = '******'
     
     parsed_user = parse_username(problem_username)
     assert_equal(parsed_user, 'Editor test-specific-0')
def validate_users(wikiusers, project, validate_as_user_ids):
    """
    Parameters
        wikiusers               : the wikiusers with a candidate mediawiki_username
        project                 : the project these wikiusers should belong to
        validate_as_user_ids    : if True, records will be checked against user_id
                                  if False, records are checked against user_name
    """
    session = db.get_mw_session(project)
    users_dict = {wu.mediawiki_username: wu for wu in wikiusers}
    
    try:
        # validate
        if validate_as_user_ids:
            keys_as_ints = [int(k) for k in users_dict.keys() if k.isdigit()]
            clause = MediawikiUser.user_id.in_(keys_as_ints)
        else:
            clause = MediawikiUser.user_name.in_(users_dict.keys())
        
        matches = session.query(MediawikiUser).filter(clause).all()
        # update results
        for match in matches:
            if validate_as_user_ids:
                key = str(match.user_id)
            else:
                key = parse_username(match.user_name)
            users_dict[key].mediawiki_username = match.user_name
            users_dict[key].mediawiki_userid = match.user_id
            users_dict[key].valid = True
            users_dict[key].reason_invalid = None
            # remove valid matches
            users_dict.pop(key)
        
        # mark the rest invalid
        for key in users_dict.keys():
            if validate_as_user_ids:
                users_dict[key].reason_invalid = 'invalid user_id: {0}'.format(key)
            else:
                users_dict[key].reason_invalid = 'invalid user_name: {0}'.format(key)
            users_dict[key].valid = False
    except Exception, e:
        # clear out the dictionary in case of an exception, and raise the exception
        for key in users_dict.keys():
            users_dict.pop(key)
        raise e