forked from PerceptumNL/KhanLatest
/
nicknames.py
122 lines (93 loc) · 3.75 KB
/
nicknames.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from phantom_users.phantom_util import is_phantom_id
import facebook_util
from third_party import search
import layer_cache
from google.appengine.ext import db
# Now that we're supporting unicode nicknames, ensure all callers get a
# consistent type of object back by converting everything to unicode.
# This fixes issue #4297.
def to_unicode(s):
if not isinstance(s, unicode):
return unicode(s, 'utf-8', 'ignore')
else:
return s
def get_default_nickname_for(user_data):
""" Gets the default nickname for a user if none is available locally.
This will infer a nickname either from Facebook or a Google e-mail address.
"""
if not user_data:
return None
user_id = user_data.user_id
email = user_data.email
if not user_id or not email:
return None
if facebook_util.is_facebook_user_id(user_id):
nickname = facebook_util.get_facebook_nickname(user_id)
elif is_phantom_id(user_id):
# Users will be prompted to login and save progress all over the place
nickname = "Niet ingelogd!"
else:
nickname = email.split('@')[0]
return to_unicode(nickname)
def combinations(iterable, r):
"""Return r length subsequences of elements from the input iterable.
Combinations are emitted in lexicographic sort order.
So, if the input iterable is sorted, the combination tuples
will be produced in sorted order.
Elements are treated as unique based on their position, not on
their value. So if the input elements are unique, there will be
no repeat values in each combination.
Copied from http://docs.python.org/library/itertools.html
"""
pool = tuple(iterable)
n = len(pool)
if r > n:
return
indices = range(r)
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
return
indices[i] += 1
for j in range(i + 1, r):
indices[j] = indices[j - 1] + 1
yield tuple(pool[i] for i in indices)
INDEX_DELIMITER = u"\uffff"
def build_index_strings(nickname):
""" Builds sanitized strings to be used in a username index so that
searches on a user's first, middle, or last name can be done.
See models.NicknameIndex for details.
"""
# Build out raw tokens. e.g. "Sal Amin Khan" -> ["amin", "khan", "sal"]
tokens = search.PUNCTUATION_REGEX.sub(' ', nickname).lower().split()
tokens.sort()
# Build out terms for multi-term prefix scans:
# e.g. [("amin"), ("khan"), ("sal"), ("amin, "khan"), ("amin", "sal")...]
term_tuples = []
for num_terms in range(1, len(tokens) + 1):
term_tuples.extend([t for t in combinations(tokens, num_terms)])
return [INDEX_DELIMITER.join(t) for t in term_tuples]
def build_search_query(raw_query):
""" Builds out a relevant search query to be used in conjunction with
the index tokens built out by build_index_strings
See models.NicknameIndex for details.
"""
tokens = search.PUNCTUATION_REGEX.sub(' ', raw_query).lower().split()
tokens.sort()
return INDEX_DELIMITER.join(tokens)
@layer_cache.cache(persist_across_app_versions=True)
def _get_offensive_terms():
return frozenset([entity.term for entity in OffensiveTerm.all()])
class OffensiveTerm(db.Model):
term = db.StringProperty()
def is_valid_nickname(name):
""" Validates a name to be used, checking for blatantly offensive names.
This uses a fairly conservative check to avoid embarassing false positives;
the community and social pressures will hopefully deal with the keen
offenders.
"""
blacklist = _get_offensive_terms()
return not any([token.lower() in blacklist for token in name.split(' ')])