/
app.py
392 lines (323 loc) · 14.9 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
from flask import Flask, render_template, request, redirect
from numpy import dot, sqrt,load,array,argsort,sum,count_nonzero,any, log
#from pandas import read_csv
from requests import get
from time import sleep
from xmltodict import parse
from random import sample, choice
from dill import load as lode
import networkx as nx
from json import dumps
from networkx.readwrite import json_graph
from sklearn.preprocessing import StandardScaler
#from pandas import read_csv
#from bs4 import UnicodeDammit
app = Flask(__name__)
app.vars={}
class catMechTransformer():
'''
A transformer that takes information about a game and returns a vector with the properties one-hot encoded.
'''
def __init__(self):
self.cats=83
self.mechs=51
def fit(self,X,y):
return self
def flatten(self,l):
cats = [0 for i in range(self.cats)]
for i in l[5]:
cats[i] = 1
mechs = [0 for i in range(self.mechs)]
for i in l[6]:
mechs[i] = 1
return l[:5] + cats+mechs
def transform(self,X):
return [self.flatten(i) for i in X]
def category_and_mechanic_table(games):
'''
Makes a table of game properties, mostly redundant with what catMechTransformer does.
'''
categories, mechanics = set([]), set([])
for game in games:
[categories.add(i) for i in game[10]]
[mechanics.add(i) for i in game[11]]
categories, mechanics = list(categories), list(mechanics)
outData=[]
for game in games:
gameData = []
gameData = gameData + game[3:4] + [game[4] if game[4] < 10 else 10] + [game[7]/60] + game[8:10]
for category in categories:
if category in game[10]:
gameData.append(1)
else:
gameData.append(0)
for mechanic in mechanics:
if mechanic in game[11]:
gameData.append(1)
else:
gameData.append(0)
outData.append(gameData)
return categories, mechanics, outData
def tanimotoSimilarity(user1, user2):
'''
Calculates the Tanimoto Similarity of two normalized vectors.
'''
z = dot(user1, user2)
#a = float(z)/(2-z)
if sum(user1) == 0 or sum(user2) == 0: z = 0
return z
def GameTree(row):
'''
builds a graph of the most similar games to a given hypothetical game (row)
'''
#a dictionary of BoardGameGeek game id and position in app.gameData
d = {i : app.gameData[i][1] for i in range(len(app.gameData)) if app.gameData[i][15] > 500}
out = {}
row = app.transformer.transform([row])
hypeSims = [tanimotoSimilarity(row, i) for i in app.gameNorm]
out['A Hypothetical Game'] = list(set([(list(hypeSims).index(sorted(hypeSims)[-i]), 3) for i in range(0, 6)]))[:5]
for k in [i[0] for i in out['A Hypothetical Game']]:
rk = list(app.gameRecs[k])
out[k] = list(set([(rk.index(sorted(rk)[-i]), 2) for i in range(0, 6)]))[:5]
for j in [i[0] for i in out[k]]:
rj = list(app.gameRecs[j])
out[j] = list(set([(rj.index(sorted(rj)[-i]), 1) for i in range(0, 6)]))[:5]
nodes=out.keys()
for i in out.values():
nodes += i
nodeNamed = []
for i in list(set(nodes)):
try:
nodeNamed += [d[i]]
except:
continue
G = nx.Graph()
G.add_nodes_from(nodeNamed, group = 1)
G.add_node(u'A Hypothetical Game', group =2)
for i in out.keys():
for j in out[i]:
if i != u'A Hypothetical Game':
G.add_edge(d[i], d[j[0]], weight=j[1])
else:
G.add_edge(i, d[j[0]], weight=j[1])
return G, [i[0] for i in out[u'A Hypothetical Game']], [d[i[0]] for i in out[u'A Hypothetical Game']]
# loading in resources. I know pickles and .npy files are unsafe, but for testing the performance gains were worth it. gameList is a list of games, their BoardGamegGek id number, and name. a Matrix is a precomputed similarity matrix of user recommendations such that we can dot our vector of game ratings into it to get a user-user similarity. aMatrixMasked is set up to allow us to quickly sum the similarities. It's the previous matrix with the nonzero entries set to one. It will allow us to quickly sum the similarities in user-user. This user-user uses cosine similarity.
#app.gamesNames=[[i[0],i[1]] for i in read_csv("gameList.csv", quotechar='"', skipinitialspace=True).as_matrix()]
#app.recs = load('aMatrixHalfFloat.npy')
#app.recsMasked = load('aMatrixMaskedHalfFloat.npy')
# itemitem is a similarity between the columns of a using Tanimoto similarity
#app.itemRecs = load('itemitemHalfFloat.npy')
# itemitem is a similarity between the rows of a matrix of game information using Tanimoto similarity. it's slightly weighted towards game ownership and comments on the game.
app.gameRecs = load('gamegameHalfFloat.npy')
app.gameData = load('gameData.p')
app.gamesNames = [[i[0], i[1]] for i in app.gameData]
app.gameScores = [i[13] for i in app.gameData]
app.gameFactors = load('gameFactors.npy')
app.cats, app.mechs, app.gameNorm = category_and_mechanic_table(app.gameData)
app.transformer = StandardScaler().fit(app.gameNorm)
cNorm = []
outData = app.transformer.transform(app.gameNorm)
for i in app.gameNorm:
j = 1./sqrt(float(dot(i, i)))
cNorm.append(list(map(lambda x: j * x, i)))
app.gameNorm = cNorm
del cNorm
app.model = lode(open('gamescoremodel','rb'))
@app.route('/')
def main():
return render_template('landing.html')
@app.route('/about')
def about():
return render_template('about.html')
@app.route('/user',methods=['GET','POST'])
def user():
'''
Recommendation page. If you give a username that will overrule any games you might give it.
'''
if request.method == 'GET':
return render_template('userinfo.html')
else:
#request was a POST
if request.form['username'] == '':
#if there's no username grab all the games, filter them and grab the ratings
t = request.form.getlist('game[]', type=float)
app.vars['games'] = list(map(lambda x: int(x), filter(lambda x: x>-1, t)))
r = request.form.getlist('rating[]', type=float)
app.vars['ratings'] = [r[i] for i in range(len(app.vars['games'])) if t > -1]
#some errors
if (len(set(app.vars['games'])) < 3):
return render_template('error.html', message='Please rate more games!')
elif (count_nonzero(app.vars['ratings']) < 3):
return render_template('error.html', message='Please rate more games!')
else:
return redirect('/main_entered')
else:
#read a username and go to that branch
app.vars["username"] = request.form['username']
return redirect('/main_username')
@app.route('/game', methods = ['GET','POST'])
def game():
'''
The behind the scenes logic for the game model.
'''
if request.method == 'GET':
return render_template('gameinfo.html')
else:
try:
app.vars['gameParts'] = [float(request.form['minplayers']), float(request.form['maxplayers']), float(request.form['avgplaytime']), float(request.form['langcomplexity']), float(request.form['playerage']), list(map(int,request.form.getlist('theme'))), list(map(int, request.form.getlist('mechanics')))]
score = app.model.predict([app.vars['gameParts']])[0]
percentile = 100 * round( float(len([i for i in app.gameScores if i <= score ])) / len(app.gameScores), 4)
row = catMechTransformer().transform([app.vars['gameParts']])[0]
a, b, c = GameTree(row)
return render_template('gameresults.html',score = str(score), percentile = str(percentile)+'%', games = c, nums = b, game_text = [app.gameData[i][-1] for i in b], game_json = dumps(json_graph.node_link_data(a)))
except:
return render_template('error.html', message = 'Please enter valid input.')
#the string that appears if a username is invalid
noUser = u'<?xml version="1.0" encoding="utf-8" standalone="yes" ?>\n<errors>\n\t<error>\n\t\t<message>Invalid username specified</message>\n\t</error>\n</errors>'
@app.route('/main_username', methods = ['GET','POST'])
def main_username():
'''
Queries the BoardGameGeek API and returns a users games and ratings
'''
#grab the XML
XML = makeRequest(app.vars["username"])
if XML == noUser:
return render_template('error.html',message='No one exists with that username!')
#parse it. I use the xmltodict library, which makes the output look like JSON data.
XML = parse(XML)
#print XML
try:
#some users have data that's slightly misformatted. This catches those entries
if int(XML['items']['@totalitems']) < 3:
return render_template('error.html', message = 'Please rate more games')
except:
pass
#a badly named function that grabs the names and ratings from the XML
toGameAndRating(XML)
del XML
#catches some errors
if (len(app.vars['games']) < 4) or (count_nonzero(app.vars['ratings']) < 5):
return render_template('error.html', message = 'Please rate more games.')
#return render_template('error.html',message=app.gamesNames[2][1])
return redirect('/main_entered')
@app.route('/main_entered', methods = ['GET','POST'])
def main_entered():
'''
Does the main recommendation logic and returns the template.
'''
game_row, rating_row = [0 for i in range(len(app.gamesNames))], [0 for i in range(len(app.gamesNames))]
owner_suggestions = rating_suggestions = game_suggestions = []
for i in range(len(app.vars['games'])):
#game_row[int(app.vars['games'][i])] = 1
rating_row[int(app.vars['games'][i])] = float(app.vars['ratings'][i])
#owner_suggestions = ownership_recs(rating_row, app.itemRecs)
#ating_suggestions = getRecs(rating_row)
#game_suggestions = ownership_recs(rating_row,app.gameRecs)
rating_suggestions = dot(dot(app.gameFactors, rating_row).T, app.gameFactors)
rating_suggestions = array([sqrt(2 * x) if x > 0 else 0 for x in rating_suggestions])
rating_suggestions = array(app.gameScores) + rating_suggestions
#a function that takes the output and mixes it together, removing the initial games as well.
ratings, keys = copacetic(rating_suggestions)
inProps, outProps = properties(keys)
return render_template('results2.html', sim=[i[0] for i in ratings], games=[i[2].replace('\,',',') for i in ratings], nums=[int(i[1]) for i in ratings], inprops=inProps, outprops = outProps, game_text = [i[3].replace('\,',',').replace('\n\n','</p><p>') for i in ratings])#inProps,outprops=outProps)
#the gameData table has a bunch of comments about each game, this function pulls out the most common.
def properties(keys):
'''
The gameData table has a bunch of comments about each game, this function pulls out the most common.
'''
inpropsMech,inpropsCat = [],[]
outpropsMech,outpropsCat = [],[]
for i in app.vars['games']:
inpropsMech = inpropsMech+app.gameData[i][11]
inpropsCat = inpropsCat+app.gameData[i][10]
for i in keys:
outpropsMech = outpropsMech+app.gameData[i][11]
outpropsCat = outpropsCat+app.gameData[i][10]
def mostCommon(lst):
def capFix(w):
return w.lower() if (("War" not in w or w != 'Wargames') and ('Nap' not in w) and ('Ren' not in w) and ('Arab' not in w)) else w
return sorted(set(map(capFix, lst)), key = lst.count)[::-1][:3]
return [mostCommon(inpropsCat), mostCommon(inpropsMech)],[mostCommon(outpropsCat), mostCommon(outpropsMech)]
def copacetic(ratings):
#this mixing seems to give a reasonably large number of games in the top 10.
new = ratings
for i in app.vars['games']:
new[i] = 0
#normalize the entries
new = new/max(new)
newkeys = argsort(new)[-50:][::-1] #50 largest simularities
names, a = [], []
#removes some versions of the same game from the results
#newkeys.reverse()
newnewkeys = []
for key in newkeys:
if (app.gamesNames[key][1].split(':'))[0] not in names:
a += [[new[key] ,app.gamesNames[key][0], app.gamesNames[key][1], app.gameData[key][-1]]]
names += [(app.gamesNames[key][1].split(':')[0])]
newnewkeys.append(key)
return a[:10], newnewkeys[:10]
#calculating the different sorts of recommendations
# def ownership_recs(user, recs):
# totals = dot(user,recs)
# simssum = dot(map(lambda x: 1 if x > 0 else 0,user),recs)
# rankings = [app.gameData[i][13] + totals[i]/simssum[i] for i in range(len(totals))]
# del totals, simssum
# #print 'item rankings', rankings[:20]
# return rankings
#if the rows are normalized cosine similarity reduces to a dot product between the matrix we care about and our vector of ratings. The normalization of the vector will eventually cancel out so we don't have to worry about it
def getRecs(user):
#sims=[similarity(user,i) for i in recs]
totals = dot(user, app.recs)
simssum = dot(user, app.recsMasked)
rankings = [app.gameData[i][13] + totals[i]/simssum[i] for i in range(len(totals))]
del totals, simssum
#print 'rankings', rankings[:20]
return rankings
def toGameAndRating(xml):
gameids = ratings = owned = []
intGameList=[int(i[0]) for i in app.gamesNames]
for i in xml[u'items'][u'item']:
w = 0 if i[u'stats'][u'@numowned']== u'' else i[u'stats'][u'@numowned']
if (int(w)>500) and (i[u'@subtype'] == u'boardgame') and int(i[u'@objectid']) in intGameList:
gameids.append(int(i[u'@objectid']))
ratings.append(0 if (i[u'stats'][u'rating'][u'@value']== u'N/A') else float(i[u'stats'][u'rating'][u'@value']))
owned.append(returnOwnership(i))
gameids, ratings ,owned = array(gameids), array(ratings), array(owned)
ids = gameids.argsort()
gameids, ratings, owned = gameids[ids], ratings[ids], owned[ids]
gameids = list(map(lambda x: intGameList.index(x), gameids))
app.vars['games'] = gameids
app.vars['ratings'] = ratings
errorMessage=u'<html>\n <head>\n <title>Error 503 Service Unavailable</title>\n </head>\n <body>\n <h1>503 Service Unavailable</h1>\n Our apologies for the temporary inconvenience. The requested URL generated 503 "Service Unavailable" error due to overloading or maintenance of the server.\n </body>\n</html>\n'
def makeRequest(name):
r = get("https://www.boardgamegeek.com/xmlapi2/collection?username={}&stats=1".format(name))
sleep(1)
while True:
try:
r = get("https://www.boardgamegeek.com/xmlapi2/collection?username={}&stats=1".format(name)).text
if r == errorMessage: #we've overloaded the server
sleep(30)
elif (len(r) > 188): #the waiting message is 187 characters, the error message is 300
return r
elif len(r) == 187:
sleep(0.15)
names.append(name)
else:
return r
except requests.ConnectionError:
sleep(0.15)
def returnOwnership(item):
ownership = 1 if (item[u'status'][u'@own'] == '1') or (item[u'status'][u'@prevowned'] == '1') or (item[u'status'][u'@preordered'] == '1') else 0
#wanting = 0.5 if (item[u'status'][u'@want'] == '1') or (item[u'status'][u'@wanttobuy'] == '1') or (item[u'status'][u'@wishlist'] == '1') else 0
return ownership #max([ownership,wanting])
@app.errorhandler(404)
def error(e):
return render_template('error.html',message='Something\'s wrong.')
@app.errorhandler(500)
def error(e):
return render_template('error.html',message='Something\'s wrong.')
if __name__ == '__main__':
app.debug = False
# app.run(port=33507)
app.run(port=33508)
# app.run(host='0.0.0.0')