-
Notifications
You must be signed in to change notification settings - Fork 3
/
movielens.py
118 lines (102 loc) · 4.37 KB
/
movielens.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from itertools import chain
from collections import Counter
import csv
import random
import logging
logger = logging.getLogger('Movielens')
import numpy as np
from scipy import sparse
from scipy.sparse.linalg import svds
from utils import uni
from utils import argmax_oracle
from utils import serialize
def overlap(l1, l2):
return len(set(l1) & l2)
def movielens_data():
fpr = open('movielens/ratings.csv', encoding='utf-8')
fpt = open('movielens/tags.csv', encoding='utf-8')
gr = csv.reader(fpr, delimiter=',', quotechar='"')
gt = csv.reader(fpt, delimiter=',', quotechar='"')
gr.__next__()
gt.__next__()
return chain(gr, gt)
class c3_movielens_rng:
# kw = {'n_movies':200, 'train_portion':0.7, 'd':10, 'K':2, 'baseline':(0.02, 0.04), 'gamma':1.00, 'disj':True}
def __init__(self, **kwarg):
logger.info('Initializing random settings "Contextual Movielens"')
self.__dict__.update(kwarg)
self.name = 'c3-movielens'
self.oracle = argmax_oracle
self.theta = None
self.regret_avl = False
self.load()
logger.info(self)
def __str__(self):
return serialize(self, 'arms', 'x', 'ctrh', 'A', 'U', 'S', 'V', 'VT', 'users')
def slot(self):
self.user = random.sample(self.users, 1)[0]
exc = self.A.getrow(self.user)
#print(len([arm for arm in self.arms if exc[0, arm] == 0]))
current = [arm for arm in self.arms if exc[0, arm] == 0]
#print(len(current), sum([arm in self.ctrh[self.user] for arm in current]))
return {arm: np.outer(self.U[self.user], self.V[arm]).flatten() for arm in self.arms if exc[0, arm] == 0} if len([arm for arm in self.arms if exc[0, arm] == 0]) > self.K + 2 else self.slot()
def realize(self, action):
return [arm in self.ctrh[self.user] for arm in action]
def regret(self, action):
return 0
def params(self, descend):
return (self.K, descend)
def load(self):
cox = []
coy = []
self.ctrh = {}
movies = {}
for user, movie, rate, timestamp in movielens_data():
user = int(user) - 1
movie = int(movie) - 1
if movie in movies:
movies[movie] += 1
else:
movies[movie] = 1
if np.random.uniform(0,1) < self.train_portion:
cox.append(user)
coy.append(movie)
else:
if user in self.ctrh:
self.ctrh[user].append(movie)
else:
self.ctrh[user] = [movie]
self.A = sparse.coo_matrix((np.ones(len(cox)), (cox, coy)), shape=(max(list(self.ctrh) + cox) + 1, max(list(movies) + coy) + 1), dtype=np.float32)
self.U, self.S, self.VT = svds(self.A, self.d)
self.A = self.A.astype(np.int32)
for i in range(self.U.shape[0]):
self.U[i] = uni(self.U[i])
self.V = self.VT.T
for i in range(self.V.shape[0]):
self.V[i] = uni(self.V[i])
if self.n_movies is None:
self.L = len(movies)
else:
self.L = self.n_movies
self.arms = set([x[1] for x in sorted([(movies[movie], movie) for movie in movies])[-self.L:]])
self.users = [x[1] for x in sorted([(overlap(self.ctrh[user], self.arms), user) for user in self.ctrh if self.L * self.baseline[0] < overlap(self.ctrh[user], self.arms) < self.L * self.baseline[1]])]
logging.info('total {0} users involved'.format(len(self.users)))
self.d = self.d ** 2
def ctrh_hist(self):
hist = {user: len([movie for movie in self.ctrh[user] if movie in self.arms]) for user in self.ctrh}
return Counter([v for k, v in hist.items()])
class c3_Lmovielens_rng(c3_movielens_rng):
def __init__(self, **kwarg):
logger.info('Initializing random settings "Contextual L-Movielens"')
self.__dict__.update(kwarg)
self.name = 'c3-L-movielens'
self.oracle = argmax_oracle
self.theta = None
self.regret_avl = False
self.load()
self.d = kwarg['d']
logger.info(self)
def slot(self):
self.user = random.sample(self.users, 1)[0]
exc = self.A.getrow(self.user)
return {arm: self.V[arm].flatten() for arm in self.arms if exc[0, arm] == 0} if len([arm for arm in self.arms if exc[0, arm] == 0]) < self.K + 2 else self.slot()