def buildModel(self): self.b = np.random.random(self.data.trainingSize()[1]) print('Preparing item sets...') self.PositiveSet = defaultdict(dict) self.FPSet = defaultdict(dict) # self.NegativeSet = defaultdict(list) for user in self.data.user: for item in self.data.trainSet_u[user]: if self.data.trainSet_u[user][item] >= 1: self.PositiveSet[user][item] = 1 # else: # self.NegativeSet[user].append(item) if user in self.social.user: for friend in self.social.getFollowees(user): if friend in self.data.user: for item in self.data.trainSet_u[friend]: if item not in self.PositiveSet[user]: if item not in self.FPSet[user]: self.FPSet[user][item] = 1 else: self.FPSet[user][item] += 1 Suk = 1 print('Training...') iteration = 0 while iteration < self.maxIter: self.loss = 0 itemList = list(self.data.item.keys()) for user in self.PositiveSet: u = self.data.user[user] kItems = list(self.FPSet[user].keys()) for item in self.PositiveSet[user]: i = self.data.item[item] for n in range(3): #negative sampling for 3 times if len(self.FPSet[user]) > 0: item_k = choice(kItems) k = self.data.item[item_k] s = sigmoid( (self.P[u].dot(self.Q[i]) + self.b[i] - self.P[u].dot(self.Q[k]) - self.b[k]) / (Suk + 1)) self.P[u] += 1 / (Suk + 1) * self.lRate * ( 1 - s) * (self.Q[i] - self.Q[k]) self.Q[i] += 1 / (Suk + 1) * self.lRate * ( 1 - s) * self.P[u] self.Q[k] -= 1 / (Suk + 1) * self.lRate * ( 1 - s) * self.P[u] self.b[i] += 1 / (Suk + 1) * self.lRate * (1 - s) self.b[k] -= 1 / (Suk + 1) * self.lRate * (1 - s) item_j = '' # if len(self.NegativeSet[user])>0: # item_j = choice(self.NegativeSet[user]) # else: item_j = choice(itemList) while (item_j in self.PositiveSet[user] or item_j in self.FPSet): item_j = choice(itemList) j = self.data.item[item_j] s = sigmoid(self.P[u].dot(self.Q[k]) + self.b[k] - self.P[u].dot(self.Q[j]) - self.b[j]) self.P[u] += self.lRate * (1 - s) * (self.Q[k] - self.Q[j]) self.Q[k] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.b[k] += self.lRate * (1 - s) self.b[j] -= self.lRate * (1 - s) self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(sigmoid((self.P[u].dot(self.Q[i])+self.b[i] - self.P[u].dot(self.Q[k])-self.b[k])/ (Suk+1))) \ - log(sigmoid(self.P[u].dot(self.Q[k])+self.b[k] - self.P[u].dot(self.Q[j])-self.b[j])) else: item_j = choice(itemList) while (item_j in self.PositiveSet[user]): item_j = choice(itemList) j = self.data.item[item_j] s = sigmoid(self.P[u].dot(self.Q[i]) + self.b[i] - self.P[u].dot(self.Q[j]) - self.b[j]) self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.b[i] += self.lRate * (1 - s) self.b[j] -= self.lRate * (1 - s) self.loss += -log(s) self.loss += self.regU * (self.P * self.P).sum( ) + self.regI * (self.Q * self.Q).sum() + self.b.dot(self.b) iteration += 1 if self.isConverged(iteration): break
def buildModel(self): print 'Kind Note: This method will probably take much time.' #build C-U-NET print 'Building collaborative user network...' #filter isolated nodes and low ratings self.itemNet = {} for item in self.dao.trainSet_i: if len(self.dao.trainSet_i[item]) > 1: self.itemNet[item] = self.dao.trainSet_i[item] self.filteredRatings = defaultdict(list) for item in self.itemNet: for user in self.itemNet[item]: if self.itemNet[item][user] > 0.75: self.filteredRatings[user].append(item) self.CUNet = defaultdict(list) for user1 in self.filteredRatings: s1 = set(self.filteredRatings[user1]) for user2 in self.filteredRatings: if user1 <> user2: s2 = set(self.filteredRatings[user2]) weight = len(s1.intersection(s2)) if weight > 0: self.CUNet[user1] += [user2] * weight #build Huffman Tree First #get weight print 'Building Huffman tree...' #To accelerate the method, the weight is estimated roughly nodes = {} for user in self.CUNet: nodes[user] = len(self.CUNet[user]) nodes = sorted(nodes.iteritems(), key=lambda d: d[1]) nodes = [HTreeNode(None, None, user[1], user[0]) for user in nodes] nodeList = OrderedLinkList() for node in nodes: listNode = Node() listNode.val = node try: nodeList.insert(listNode) except AttributeError: pass self.HTree = HuffmanTree(vecLength=self.walkDim) self.HTree.buildTree(nodeList) print 'Coding for all users...' self.HTree.coding(self.HTree.root, '', 0) print 'Generating random deep walks...' self.walks = [] self.visited = defaultdict(dict) for user in self.CUNet: for t in range(self.walkCount): path = [user] for i in range(1, self.walkLength): nextNode = choice(self.CUNet[user]) count = 0 while (self.visited[user].has_key(nextNode)): nextNode = choice(self.CUNet[user]) #break infinite loop count += 1 if count == 10: break path.append(nextNode) self.visited[user][nextNode] = 1 self.walks.append(path) #print path shuffle(self.walks) #Training get top-k friends print 'Generating user embedding...' iteration = 1 while iteration <= self.epoch: loss = 0 for walk in self.walks: for user in walk: centerUser = walk[len(walk) / 2] if user <> centerUser: code = self.HTree.code[user] centerCode = self.HTree.code[centerUser] x = self.HTree.vector[centerCode] for i in range(1, len(code)): prefix = code[0:i] w = self.HTree.vector[prefix] self.HTree.vector[prefix] += self.lRate * ( 1 - sigmoid(w.dot(x))) * x self.HTree.vector[centerCode] += self.lRate * ( 1 - sigmoid(w.dot(x))) * w loss += -log(sigmoid(w.dot(x))) print 'iteration:', iteration, 'loss:', loss iteration += 1 print 'User embedding generated.' print 'Constructing similarity matrix...' self.Sim = SymmetricMatrix(len(self.CUNet)) for user1 in self.CUNet: for user2 in self.CUNet: if user1 <> user2: prefix1 = self.HTree.code[user1] vec1 = self.HTree.vector[prefix1] prefix2 = self.HTree.code[user2] vec2 = self.HTree.vector[prefix2] if self.Sim.contains(user1, user2): continue sim = cosine(vec1, vec2) self.Sim.set(user1, user2, sim) self.topKSim = {} for user in self.CUNet: self.topKSim[user] = sorted(self.Sim[user].iteritems(), key=lambda d: d[1], reverse=True)[:self.topK] print 'Similarity matrix finished.' #print self.topKSim #matrix decomposition print 'Decomposing...' iteration = 0 while iteration < self.maxIter: self.loss = 0 for entry in self.dao.trainingData: user, item, rating = entry u = self.dao.user[user] #get user id i = self.dao.item[item] #get item id error = rating - self.P[u].dot(self.Q[i]) self.loss += error**2 p = self.P[u] q = self.Q[i] #update latent vectors self.P[u] += self.lRate * (error * q - self.regU * p) self.Q[i] += self.lRate * (error * p - self.regI * q) for user in self.CUNet: u = self.dao.user[user] friends = self.topKSim[user] for friend in friends: uf = self.dao.user[friend[0]] self.P[u] -= self.lRate * (self.P[u] - self.P[uf]) * self.alpha self.loss += self.alpha * ( self.P[u] - self.P[uf]).dot(self.P[u] - self.P[uf]) self.loss += self.regU * (self.P * self.P).sum() + self.regI * ( self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break
def buildModel(self): self.b = np.random.random(self.dao.trainingSize()[1]) print 'Preparing item sets...' self.PositiveSet = defaultdict(dict) self.IPositiveSet = defaultdict(dict) # self.NegativeSet = defaultdict(list) for user in self.dao.user: for item in self.dao.trainSet_u[user]: if self.dao.trainSet_u[user][item] >= 1: self.PositiveSet[user][item] = 1 # else: # self.NegativeSet[user].append(item) if self.sao.user.has_key(user): for friend in self.sao.getFollowees(user): if self.dao.user.has_key(friend): for item in self.dao.trainSet_u[friend]: if not self.PositiveSet[user].has_key(item): if not self.IPositiveSet[user].has_key(item): self.IPositiveSet[user][item] = 1 else: self.IPositiveSet[user][item] += 1 print 'Training...' iteration = 0 while iteration < self.maxIter: self.loss = 0 itemList = self.dao.item.keys() for user in self.PositiveSet: u = self.dao.user[user] for item in self.PositiveSet[user]: i = self.dao.item[item] kItems = self.IPositiveSet[user].keys() if len(self.IPositiveSet[user]) > 0: item_k = choice(kItems) k = self.dao.item[item_k] Suk = self.IPositiveSet[user][item_k] self.P[u] += (1 / (Suk+1)) *self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))/ (Suk+1)))\ * (self.Q[i] - self.Q[k]) self.Q[i] += (1 / (Suk+1)) *self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))/ (Suk+1))) * \ self.P[u] self.Q[k] -= (1 / (Suk+1)) *self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))/ (Suk+1))) * self.P[u] self.b[i] += (1 / (Suk + 1)) * self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k])) / (Suk + 1))) self.b[k] -= (1 / (Suk + 1)) * self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k])) / (Suk + 1))) item_j = '' # if len(self.NegativeSet[user])>0: # item_j = choice(self.NegativeSet[user]) # else: item_j = choice(itemList) while (self.PositiveSet[user].has_key(item_j) or self.IPositiveSet.has_key(item_j)): item_j = choice(itemList) j = self.dao.item[item_j] self.P[u] += self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * (self.Q[k] - self.Q[j]) self.Q[k] += self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.Q[j] -= self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.b[k] += self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) self.b[j] -= self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))/(Suk+1))) \ - log(sigmoid(self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j]))) else: item_j = choice(itemList) while (self.PositiveSet[user].has_key(item_j)): item_j = choice(itemList) j = self.dao.item[item_j] self.P[u] += self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * ( self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * \ self.P[u] self.Q[j] -= self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * \ self.P[u] self.b[i] += self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])))) self.b[j] -= self.lRate * (1 - sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])))) self.loss += -log(sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) self.loss += self.regU * (self.P * self.P).sum() + self.regI * (self.Q * self.Q).sum()+self.b.dot(self.b) iteration += 1 if self.isConverged(iteration): break
def buildModel(self): self.b = np.random.random(self.data.trainingSize()[1]) self.b = np.zeros(self.data.trainingSize()[1]) print('Preparing item sets...') self.PositiveSet = defaultdict(dict) self.FPSet = defaultdict(dict) # self.NegativeSet = defaultdict(list) for user in self.data.user: for item in self.data.trainSet_u[user]: if self.data.trainSet_u[user][item] >= 1: self.PositiveSet[user][item] = 1 # else: # self.NegativeSet[user].append(item) if user in self.social.user: for friend in self.social.getFollowees(user): if friend in self.data.user: for item in self.data.trainSet_u[friend]: if item not in self.PositiveSet[user]: if item not in self.FPSet[user]: self.FPSet[user][item] = 1 else: self.FPSet[user][item] += 1 Suk = 0 print('Training...') iteration = 0 # self.isConverged(iteration) while iteration < self.maxIter: self.loss = 0 itemList = list(self.data.item.keys()) for user in tqdm(self.PositiveSet, desc="training processing...", total=len(self.PositiveSet), postfix='epoch [{}]'.format(iteration)): u = self.data.user[user] kItems = list(self.FPSet[user].keys()) Suk = self.accountDAO.SI[user] / np.sqrt( self.accountDAO.SB[user]) for item in self.PositiveSet[user]: i = self.data.item[item] for n in range(self.C): # negative sampling for 3 times if len(self.FPSet[user]) > 0: # if False: item_k = choice(kItems) k = self.data.item[item_k] s = sigmoid( (self.P[u].dot(self.Q[i]) + self.b[i] - self.P[u].dot(self.Q[k]) - self.b[k]) / (Suk + 1)) self.P[u] += 1 / (Suk + 1) * self.lRate * ( 1 - s) * (self.Q[i] - self.Q[k]) self.Q[i] += 1 / (Suk + 1) * self.lRate * ( 1 - s) * self.P[u] self.Q[k] -= 1 / (Suk + 1) * self.lRate * ( 1 - s) * self.P[u] self.b[i] += 1 / (Suk + 1) * self.lRate * (1 - s) self.b[k] -= 1 / (Suk + 1) * self.lRate * (1 - s) item_j = '' # if len(self.NegativeSet[user])>0: # item_j = choice(self.NegativeSet[user]) # else: item_j = choice(itemList) sample_num = 0 continue_train = True while (item_j in self.PositiveSet[user] or item_j in self.FPSet): item_j = choice(itemList) sample_num += 1 if sample_num > 3: continue_train = False break if not continue_train: break j = self.data.item[item_j] s = sigmoid(self.P[u].dot(self.Q[k]) + self.b[k] - self.P[u].dot(self.Q[j]) - self.b[j]) self.P[u] += self.lRate * (1 - s) * (self.Q[k] - self.Q[j]) self.Q[k] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.b[k] += self.lRate * (1 - s) self.b[j] -= self.lRate * (1 - s) self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(sigmoid( (self.P[u].dot(self.Q[i]) + self.b[i] - self.P[u].dot(self.Q[k]) - self.b[k]) / ( Suk + 1))) \ - log( sigmoid(self.P[u].dot(self.Q[k]) + self.b[k] - self.P[u].dot(self.Q[j]) - self.b[j])) else: item_j = choice(itemList) while (item_j in self.PositiveSet[user]): item_j = choice(itemList) j = self.data.item[item_j] s = sigmoid(self.P[u].dot(self.Q[i]) + self.b[i] - self.P[u].dot(self.Q[j]) - self.b[j]) self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.b[i] += self.lRate * (1 - s) self.b[j] -= self.lRate * (1 - s) self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.loss += -log(s) self.loss += self.regU * (self.P * self.P).sum( ) + self.regI * (self.Q * self.Q).sum() + self.b.dot(self.b) iteration += 1 if self.isConverged(iteration): break
def buildModel(self): print 'Kind Note: This method will probably take much time.' #build C-U-NET print 'Building collaborative user network...' #filter isolated nodes self.itemNet = {} for item in self.dao.trainSet_i: if len(self.dao.trainSet_i[item]) > 1: self.itemNet[item] = self.dao.trainSet_i[item] self.filteredRatings = defaultdict(list) for item in self.itemNet: for user in self.itemNet[item]: if self.itemNet[item][user] >= 1: self.filteredRatings[user].append(item) self.CUNet = defaultdict(list) for user1 in self.filteredRatings: s1 = set(self.filteredRatings[user1]) for user2 in self.filteredRatings: if user1 <> user2: s2 = set(self.filteredRatings[user2]) weight = len(s1.intersection(s2)) if weight > 0: self.CUNet[user1] += [user2] * weight #build Huffman Tree First #get weight print 'Building Huffman tree...' #To accelerate the method, the weight is estimated roughly nodes = {} for user in self.CUNet: nodes[user] = len(self.CUNet[user]) nodes = sorted(nodes.iteritems(), key=lambda d: d[1]) nodes = [HTreeNode(None, None, user[1], user[0]) for user in nodes] nodeList = OrderedLinkList() for node in nodes: listNode = Node() listNode.val = node try: nodeList.insert(listNode) except AttributeError: pass self.HTree = HuffmanTree(vecLength=self.walkDim) self.HTree.buildTree(nodeList) print 'Coding for all users...' self.HTree.coding(self.HTree.root, '', 0) print 'Generating random deep walks...' self.walks = [] self.visited = defaultdict(dict) for user in self.CUNet: for t in range(self.walkCount): path = [user] for i in range(1, self.walkLength): nextNode = choice(self.CUNet[user]) count = 0 while (self.visited[user].has_key(nextNode)): nextNode = choice(self.CUNet[user]) #break infinite loop count += 1 if count == 10: break path.append(nextNode) self.visited[user][nextNode] = 1 self.walks.append(path) #print path shuffle(self.walks) #Training get top-k friends print 'Generating user embedding...' iteration = 1 while iteration <= self.epoch: loss = 0 #slide windows randomly for n in range(self.walkLength / self.winSize): for walk in self.walks: center = randint(0, len(walk) - 1) s = max(0, center - self.winSize / 2) e = min(center + self.winSize / 2, len(walk) - 1) for user in walk[s:e]: centerUser = walk[center] if user <> centerUser: code = self.HTree.code[user] centerCode = self.HTree.code[centerUser] x = self.HTree.vector[centerCode] for i in range(1, len(code)): prefix = code[0:i] w = self.HTree.vector[prefix] self.HTree.vector[prefix] += self.lRate * ( 1 - sigmoid(w.dot(x))) * x self.HTree.vector[centerCode] += self.lRate * ( 1 - sigmoid(w.dot(x))) * w loss += -log(sigmoid(w.dot(x)), 2) print 'iteration:', iteration, 'loss:', loss iteration += 1 print 'User embedding generated.' print 'Constructing similarity matrix...' self.Sim = SymmetricMatrix(len(self.CUNet)) for user1 in self.CUNet: for user2 in self.CUNet: if user1 <> user2: prefix1 = self.HTree.code[user1] vec1 = self.HTree.vector[prefix1] prefix2 = self.HTree.code[user2] vec2 = self.HTree.vector[prefix2] if self.Sim.contains(user1, user2): continue sim = cosine(vec1, vec2) self.Sim.set(user1, user2, sim) self.topKSim = {} for user in self.CUNet: self.topKSim[user] = sorted(self.Sim[user].iteritems(), key=lambda d: d[1], reverse=True)[:self.topK] print 'Similarity matrix finished.' #print self.topKSim #prepare Pu set, IPu set, and Nu set print 'Preparing item sets...' self.PositiveSet = defaultdict(dict) self.IPositiveSet = defaultdict(list) #self.NegativeSet = defaultdict(list) for user in self.topKSim: for item in self.dao.trainSet_u[user]: if self.dao.trainSet_u[user][item] >= 1: self.PositiveSet[user][item] = 1 # else: # self.NegativeSet[user].append(item) for friend in self.topKSim[user]: for item in self.dao.trainSet_u[friend[0]]: if not self.PositiveSet[user].has_key(item): self.IPositiveSet[user].append(item) print 'Training...' iteration = 0 while iteration < self.maxIter: self.loss = 0 itemList = self.dao.item.keys() for user in self.PositiveSet: u = self.dao.user[user] for item in self.PositiveSet[user]: if len(self.IPositiveSet[user]) > 0: item_k = choice(self.IPositiveSet[user]) i = self.dao.item[item] k = self.dao.item[item_k] self.P[u] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * ( self.Q[i] - self.Q[k]) self.Q[i] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * self.P[u] self.Q[k] -= self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * self.P[u] item_j = '' # if len(self.NegativeSet[user])>0: # item_j = choice(self.NegativeSet[user]) # else: item_j = choice(itemList) while (self.PositiveSet[user].has_key(item_j)): item_j = choice(itemList) j = self.dao.item[item_j] self.P[u] += (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot( self.Q[j])))) * (self.Q[k] - self.Q[j]) self.Q[k] += (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.Q[j] -= (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(sigmoid(self.P[u].dot(self.Q[i])-self.P[u].dot(self.Q[k]))) - \ log(sigmoid((1/self.s)*(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k])))) self.loss += self.regU * (self.P * self.P).sum() + self.regI * ( self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break
def buildModel(self): print 'Kind Note: This method will probably take much time.' #build C-U-NET print 'Building collaborative user network...' #filter isolated nodes self.itemNet = {} for item in self.dao.trainSet_i: if len(self.dao.trainSet_i[item]) > 1: self.itemNet[item] = self.dao.trainSet_i[item] self.filteredRatings = defaultdict(list) for item in self.itemNet: for user in self.itemNet[item]: if self.itemNet[item][user] >= 1: self.filteredRatings[user].append(item) self.CUNet = defaultdict(list) for user1 in self.filteredRatings: s1 = set(self.filteredRatings[user1]) for user2 in self.filteredRatings: if user1 <> user2: s2 = set(self.filteredRatings[user2]) weight = len(s1.intersection(s2)) if weight > 0: self.CUNet[user1] += [user2] * weight #build Huffman Tree First #get weight # print 'Building Huffman tree...' # #To accelerate the method, the weight is estimated roughly # nodes = {} # for user in self.CUNet: # nodes[user] = len(self.CUNet[user]) # nodes = sorted(nodes.iteritems(),key=lambda d:d[1]) # nodes = [HTreeNode(None,None,user[1],user[0]) for user in nodes] # nodeList = OrderedLinkList() # for node in nodes: # listNode = Node() # listNode.val = node # try: # nodeList.insert(listNode) # except AttributeError: # pass # self.HTree = HuffmanTree(vecLength=self.walkDim) # self.HTree.buildTree(nodeList) # print 'Coding for all users...' # self.HTree.coding(self.HTree.root,'',0) print 'Generating random deep walks...' self.walks = [] self.visited = defaultdict(dict) for user in self.CUNet: for t in range(self.walkCount): path = [user] lastNode = user for i in range(1, self.walkLength): nextNode = choice(self.CUNet[lastNode]) count = 0 while (self.visited[lastNode].has_key(nextNode)): nextNode = choice(self.CUNet[lastNode]) #break infinite loop count += 1 if count == 10: break path.append(nextNode) self.visited[user][nextNode] = 1 lastNode = nextNode self.walks.append(path) #print path shuffle(self.walks) #Training get top-k friends print 'Generating user embedding...' # iteration = 1 # while iteration <= self.epoch: # loss = 0 # #slide windows randomly # # for n in range(self.walkLength/self.winSize): # # for walk in self.walks: # center = randint(0, len(walk)-1) # s = max(0,center-self.winSize/2) # e = min(center+self.winSize/2,len(walk)-1) # for user in walk[s:e]: # centerUser = walk[center] # if user <> centerUser: # code = self.HTree.code[user] # centerCode = self.HTree.code[centerUser] # x = self.HTree.vector[centerCode] # for i in range(1,len(code)): # prefix = code[0:i] # w = self.HTree.vector[prefix] # self.HTree.vector[prefix] += self.lRate*(1-sigmoid(w.dot(x)))*x # self.HTree.vector[centerCode] += self.lRate*(1-sigmoid(w.dot(x)))*w # loss += -log(sigmoid(w.dot(x)),2) # print 'iteration:', iteration, 'loss:', loss # iteration+=1 model = w2v.Word2Vec(self.walks, size=self.walkDim, window=5, min_count=0, iter=3) print 'User embedding generated.' print 'Constructing similarity matrix...' self.W = np.random.rand(self.dao.trainingSize()[0], self.walkDim) / 10 self.topKSim = {} i = 0 for user1 in self.CUNet: # prefix1 = self.HTree.code[user1] # vec1 = self.HTree.vector[prefix1] sims = [] u1 = self.dao.user[user1] self.W[u1] = model.wv[user1] for user2 in self.CUNet: if user1 <> user2: u2 = self.dao.user[user2] self.W[u2] = model.wv[user2] sims.append((user2, cosine(self.W[u1], self.W[u2]))) self.topKSim[user1] = sorted(sims, key=lambda d: d[1], reverse=True)[:self.topK] i += 1 if i % 200 == 0: print 'progress:', i, '/', len(self.CUNet) print 'Similarity matrix finished.' #print self.topKSim #prepare Pu set, IPu set, and Nu set print 'Preparing item sets...' self.PositiveSet = defaultdict(dict) self.IPositiveSet = defaultdict(list) #self.NegativeSet = defaultdict(list) for user in self.topKSim: for item in self.dao.trainSet_u[user]: if self.dao.trainSet_u[user][item] >= 1: self.PositiveSet[user][item] = 1 # else: # self.NegativeSet[user].append(item) for friend in self.topKSim[user]: for item in self.dao.trainSet_u[friend[0]]: if not self.PositiveSet[user].has_key(item): self.IPositiveSet[user].append(item) print 'Training...' iteration = 0 while iteration < self.maxIter: self.loss = 0 itemList = self.dao.item.keys() for user in self.PositiveSet: u = self.dao.user[user] for item in self.PositiveSet[user]: i = self.dao.item[item] if len(self.IPositiveSet[user]) > 0: item_k = choice(self.IPositiveSet[user]) k = self.dao.item[item_k] self.P[u] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * ( self.Q[i] - self.Q[k]) self.Q[i] += self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * \ self.P[u] self.Q[k] -= self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * \ self.P[u] item_j = '' # if len(self.NegativeSet[user])>0: # item_j = choice(self.NegativeSet[user]) # else: item_j = choice(itemList) while (self.PositiveSet[user].has_key(item_j)): item_j = choice(itemList) j = self.dao.item[item_j] self.P[u] += (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot( self.Q[j])))) * (self.Q[k] - self.Q[j]) self.Q[k] += (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.Q[j] -= (1 / self.s) * self.lRate * (1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) - \ log(sigmoid((1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) else: item_j = choice(itemList) while (self.PositiveSet[user].has_key(item_j)): item_j = choice(itemList) j = self.dao.item[item_j] self.P[u] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * ( self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * \ self.P[u] self.Q[j] -= self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * \ self.P[u] self.loss += -log( sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) self.loss += self.regU * (self.P * self.P).sum() + self.regI * ( self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break
def buildModel(self): self.dao.ratings = dict(self.dao.trainingSet_u, **self.dao.testSet_u) #suspicous set print 'Preparing sets...' self.sSet = defaultdict(dict) #normal set self.nSet = defaultdict(dict) # self.NegativeSet = defaultdict(list) for user in self.dao.user: for item in self.dao.ratings[user]: # if self.dao.ratings[user][item] >= 5 and self.labels[user]=='1': if self.labels[user] == '1': self.sSet[item][user] = 1 # if self.dao.ratings[user][item] >= 5 and self.labels[user] == '0': if self.labels[user] == '0': self.nSet[item][user] = 1 # Jointly decompose R(ratings) and SPPMI with shared user latent factors P iteration = 0 while iteration < self.maxIter: self.loss = 0 for item in self.sSet: i = self.dao.all_Item[item] if not self.nSet.has_key(item): continue normalUserList = self.nSet[item].keys() for user in self.sSet[item]: su = self.dao.all_User[user] # if len(self.NegativeSet[user]) > 0: # item_j = choice(self.NegativeSet[user]) # else: normalUser = choice(normalUserList) nu = self.dao.all_User[normalUser] s = sigmoid(self.P[su].dot(self.Q[i]) - self.P[nu].dot(self.Q[i])) self.Q[i] += (self.lRate * (1 - s) * (self.P[su] - self.P[nu])) self.P[su] += (self.lRate * (1 - s) * self.Q[i]) self.P[nu] -= (self.lRate * (1 - s) * self.Q[i]) self.Q[i] -= self.lRate * self.regI * self.Q[i] self.P[su] -= self.lRate * self.regU * self.P[su] self.P[nu] -= self.lRate * self.regU * self.P[nu] self.loss += (-log(s)) # # for item in self.sSet: # if not self.nSet.has_key(item): # continue # for user1 in self.sSet[item]: # for user2 in self.sSet[item]: # su1 = self.dao.all_User[user1] # su2 = self.dao.all_User[user2] # self.P[su1] += (self.lRate*(self.P[su1]-self.P[su2]))*self.delta # self.P[su2] -= (self.lRate*(self.P[su1]-self.P[su2]))*self.delta # # self.loss += ((self.P[su1]-self.P[su2]).dot(self.P[su1]-self.P[su2]))*self.delta for user in self.dao.ratings: for item in self.dao.ratings[user]: rating = self.dao.ratings[user][item] if rating < 5: continue error = rating - self.predictRating(user, item) u = self.dao.all_User[user] i = self.dao.all_Item[item] p = self.P[u] q = self.Q[i] # self.loss += (error ** 2)*self.b # update latent vectors self.P[u] += (self.lRate * (error * q - self.regU * p)) self.Q[i] += (self.lRate * (error * p - self.regI * q)) for user in self.SPPMI: u = self.dao.all_User[user] p = self.P[u] for context in self.SPPMI[user]: v = self.dao.all_User[context] m = self.SPPMI[user][context] g = self.G[v] diff = (m - p.dot(g)) self.loss += (diff**2) # update latent vectors self.P[u] += (self.lRate * diff * g) self.G[v] += (self.lRate * diff * p) self.loss += self.regU * (self.P * self.P).sum() + self.regI * ( self.Q * self.Q).sum() + self.regR * (self.G * self.G).sum() iteration += 1 print 'iteration:', iteration # preparing examples self.training = [] self.trainingLabels = [] self.test = [] self.testLabels = [] for user in self.dao.trainingSet_u: self.training.append(self.P[self.dao.all_User[user]]) self.trainingLabels.append(self.labels[user]) for user in self.dao.testSet_u: self.test.append(self.P[self.dao.all_User[user]]) self.testLabels.append(self.labels[user])
def buildModel(self): print('Kind Note: This method will probably take much time.') #build C-U-NET print('Building collaborative user network...') userListen = defaultdict(dict) for user in self.data.userRecord: for item in self.data.userRecord[user]: userListen[user][item[self.recType]] = 1 self.CUNet = defaultdict(list) for user1 in userListen: s1 = set(userListen[user1].keys()) for user2 in userListen: if user1 != user2: s2 = set(userListen[user2].keys()) weight = len(s1.intersection(s2)) if weight > 0: self.CUNet[user1] += [user2] * weight print('Generating random deep walks...') self.walks = [] self.visited = defaultdict(dict) for user in self.CUNet: for t in range(self.walkCount): path = [user] lastNode = user for i in range(1, self.walkLength): nextNode = choice(self.CUNet[lastNode]) count = 0 while (nextNode in self.visited[lastNode]): nextNode = choice(self.CUNet[lastNode]) #break infinite loop count += 1 if count == 10: break path.append(nextNode) self.visited[user][nextNode] = 1 lastNode = nextNode self.walks.append(path) shuffle(self.walks) #Training get top-k friends print('Generating user embedding...') model = w2v.Word2Vec(self.walks, size=self.walkDim, window=self.winSize, min_count=0, iter=self.epoch) print('User embedding generated.') print('Constructing similarity matrix...') self.W = np.random.rand(self.data.getSize('user'), self.k) / 10 # global user preference self.topKSim = {} i = 0 for user in self.CUNet: u = self.data.getId(user, 'user') self.W[u] = model.wv[user] for user1 in self.CUNet: sims = [] u1 = self.data.getId(user1, 'user') for user2 in self.CUNet: if user1 != user2: u2 = self.data.getId(user2, 'user') sims.append((user2, cosine(self.W[u1], self.W[u2]))) self.topKSim[user1] = sorted(sims, key=lambda d: d[1], reverse=True)[:self.topK] i += 1 if i % 200 == 0: print('progress:', i, '/', len(self.CUNet)) print('Similarity matrix finished.') #print self.topKSim #prepare Pu set, IPu set, and Nu set print('Preparing item sets...') self.PositiveSet = defaultdict(list) self.IPositiveSet = defaultdict(list) #self.NegativeSet = defaultdict(list) for user in self.data.userRecord: for event in self.data.userRecord[user]: self.PositiveSet[user].append(event[self.recType]) for user in self.CUNet: for friend in self.topKSim[user]: self.IPositiveSet[user] += list( set(self.PositiveSet[friend[0]]).difference( self.PositiveSet[user])) print('Training...') iteration = 0 while iteration < self.maxIter: self.loss = 0 itemList = list(self.data.name2id[self.recType].keys()) for user in self.PositiveSet: u = self.data.getId(user, 'user') for item in self.PositiveSet[user]: i = self.data.getId(item, self.recType) for n in range(3): if len(self.IPositiveSet[user]) > 0: item_k = choice(self.IPositiveSet[user]) k = self.data.getId(item_k, self.recType) self.P[u] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * ( self.Q[i] - self.Q[k]) self.Q[i] += self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * \ self.P[u] self.Q[k] -= self.lRate * (1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) * \ self.P[u] item_j = '' # if len(self.NegativeSet[user])>0: # item_j = choice(self.NegativeSet[user]) # else: item_j = choice(itemList) while (user in self.data.listened[self.recType][item_j] ): item_j = choice(itemList) j = self.data.getId(item_j, self.recType) self.P[u] += (1 / self.s) * self.lRate * ( 1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot( self.Q[j])))) * (self.Q[k] - self.Q[j]) self.Q[k] += (1 / self.s) * self.lRate * ( 1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.Q[j] -= (1 / self.s) * self.lRate * ( 1 - sigmoid( (1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) * self.P[u] self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k]))) - \ log(sigmoid((1 / self.s) * (self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])))) else: item_j = choice(itemList) while (user in self.data.listened[self.recType][item_j] ): item_j = choice(itemList) j = self.data.getId(item_j, self.recType) self.P[u] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * ( self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * self.P[u] self.Q[j] -= self.lRate * ( 1 - sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) * self.P[u] self.loss += -log( sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j]))) self.loss += self.regU * (self.P * self.P).sum( ) + self.regI * (self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break
def buildModel(self): # data clean # li = self.sao.followees.keys() # print 'Kind Note: This method will probably take much time.' # build U-F-NET print 'Building weighted user-friend network...' # filter isolated nodes and low ratings # Definition of Meta-Path self.I = np.random.rand(self.data.getSize('track'), self.k) / 10 #item characteristics self.G = np.random.rand(self.data.getSize('user'), self.k) / 10 #global user preference self.R = np.random.rand(self.data.getSize('user'), self.k) / 10 #recent user preference self.user2track = defaultdict(list) self.user2artist = defaultdict(list) self.user2album = defaultdict(list) self.r_user2track = defaultdict(list) #recent self.r_user2artist = defaultdict(list) self.r_user2album = defaultdict(list) self.track2user = defaultdict(list) self.artist2user = defaultdict(list) self.album2user = defaultdict(list) self.artist2track = defaultdict(list) self.artist2album = defaultdict(list) self.album2track = defaultdict(list) self.album2artist = {} self.track2artst = {} self.track2album = {} for user in self.data.userRecord: for item in self.data.userRecord[user]: self.user2track[user].append(item['track']) self.user2artist[user].append(item['artist']) if self.data.columns.has_key('album'): self.user2album[user].append(item['album']) recent = max(0, len(self.data.userRecord[user]) - 20) for item in self.data.userRecord[user][recent:]: self.r_user2track[user].append(item['track']) self.r_user2artist[user].append(item['artist']) if self.data.columns.has_key('album'): self.user2album[user].append(item['album']) for artist in self.data.listened['artist']: for user in self.data.listened['artist'][artist]: self.artist2user[artist] += [ user ] * self.data.listened['artist'][artist][user] for track in self.data.listened['track']: for user in self.data.listened['track'][track]: self.track2user[track] += [ user ] * self.data.listened['track'][track][user] if self.data.columns.has_key('album'): for album in self.data.listened['album']: for user in self.data.listened['album'][album]: self.album2user[album] += [ user ] * self.data.listened['album'][album][user] for artist in self.data.artist2Track: self.artist2track[artist] = self.data.artist2Track[artist].keys() for key in self.data.artist2Track[artist]: self.track2artst[key] = artist if self.data.columns.has_key('album'): for album in self.data.album2Track: self.album2track[album] = self.data.album2Track[album].keys() for key in self.data.album2Track[album]: self.track2album[key] = album for artist in self.data.artist2Album: self.artist2album[artist] = self.data.artist2Album[ artist].keys() for key in self.data.artist2Album[artist]: self.album2artist[key] = artist print 'Generating random meta-path random walks...' #global walks self.walks = [] #recent walks self.r_walks = [] p1 = 'UTU' p2 = 'UAU' p3 = 'UZU' p4 = 'UTATU' p5 = 'UTZTU' p6 = 'UTZAZTU' mPaths = [] if self.data.columns.has_key('album'): mPaths = [p1, p2, p3, p4, p5, p6] else: mPaths = [p1, p2, p4] for user in self.data.userRecord: for mp in mPaths: for t in range(self.walkCount): path = [user] lastNode = user nextNode = user lastType = 'U' for i in range(self.walkLength / len(mp[1:])): for tp in mp[1:]: try: if tp == 'T' and lastType == 'U': nextNode = choice( self.user2track[lastNode]) elif tp == 'T' and lastType == 'A': nextNode = choice( self.artist2track[lastNode]) elif tp == 'T' and lastType == 'Z': nextNode = choice( self.album2track[lastNode]) elif tp == 'A' and lastType == 'T': nextNode = self.track2artst[lastNode] elif tp == 'A' and lastType == 'Z': nextNode = self.album2artist[lastNode] elif tp == 'A' and lastType == 'U': nextNode = choice( self.user2artist[lastNode]) elif tp == 'Z' and lastType == 'U': nextNode = choice( self.user2album[lastNode]) elif tp == 'Z' and lastType == 'A': nextNode = choice( self.artist2album[lastNode]) elif tp == 'Z' and lastType == 'T': nextNode = self.track2album[lastNode] elif tp == 'U': if lastType == 'T': nextNode = choice( self.track2user[lastNode]) elif lastType == 'Z': nextNode = choice( self.album2user[lastNode]) elif lastType == 'A': nextNode = choice( self.artist2user[lastNode]) path.append(nextNode) lastNode = nextNode lastType = tp except (KeyError, IndexError): path = [] break if path: self.walks.append(path) # for node in path: # if node[1] == 'U' or node[1] == 'F': # self.usercovered[node[0]] = 1 # print path # if mp == 'UFIU': # pass shuffle(self.walks) #recent random walks for user in self.data.userRecord: for mp in mPaths: for t in range(self.walkCount / 2): path = [user] lastNode = user nextNode = user lastType = 'U' for i in range(self.walkLength / len(mp[1:])): for tp in mp[1:]: try: if tp == 'T' and lastType == 'U': nextNode = choice( self.r_user2track[lastNode]) elif tp == 'T' and lastType == 'A': nextNode = choice( self.artist2track[lastNode]) elif tp == 'T' and lastType == 'Z': nextNode = choice( self.album2track[lastNode]) elif tp == 'A' and lastType == 'T': nextNode = self.track2artst[lastNode] elif tp == 'A' and lastType == 'Z': nextNode = self.album2artist[lastNode] elif tp == 'A' and lastType == 'U': nextNode = choice( self.r_user2artist[lastNode]) elif tp == 'Z' and lastType == 'U': nextNode = choice( self.r_user2album[lastNode]) elif tp == 'Z' and lastType == 'A': nextNode = choice( self.artist2album[lastNode]) elif tp == 'Z' and lastType == 'T': nextNode = self.track2album[lastNode] elif tp == 'U': if lastType == 'T': nextNode = choice( self.track2user[lastNode]) elif lastType == 'Z': nextNode = choice( self.album2user[lastNode]) elif lastType == 'A': nextNode = choice( self.artist2user[lastNode]) path.append(nextNode) lastNode = nextNode lastType = tp except (KeyError, IndexError): path = [] break if path: self.r_walks.append(path) # for node in path: # if node[1] == 'U' or node[1] == 'F': # self.usercovered[node[0]] = 1 # print path # if mp == 'UFIU': # pass shuffle(self.r_walks) #local Preference print 'walks:', len(self.walks) print 'recent walks', len(self.r_walks) # Training get top-k friends print 'Generating user embedding...' # for user in self.data.userRecord: # playList = [] # for item in self.data.userRecord[user]: # playList.append(item['track']) # self.walks.append(playList) g_model = w2v.Word2Vec() #g_model = w2v.Word2Vec(self.walks, size=self.k, window=self.winSize, min_count=0, iter=self.epoch) # for track in self.data.listened['track']: # tid = self.data.getId(track, 'track') # try: # self.Q[tid] = model.wv[track] # except KeyError: # pass #self.R = np.zeros((self.data.getSize('user'), self.k)) for user in self.data.userRecord: uid = self.data.getId(user, 'user') try: self.G[uid] = g_model.wv[user] except KeyError: pass for item in self.data.name2id['track']: iid = self.data.getId(item, 'track') try: self.I[iid] = g_model.wv[item] except KeyError: pass r_model = w2v.Word2Vec(self.r_walks, size=self.k, window=self.winSize, min_count=0, iter=self.epoch) for user in self.data.userRecord: uid = self.data.getId(user, 'user') self.R[uid] = r_model.wv[user] for item in self.data.listened[self.recType]: iid = self.data.getId(item, self.recType) try: self.Q[iid] = g_model.wv[item] except KeyError: pass print 'User embedding generated.' # userListened = defaultdict(dict) for user in self.data.userRecord: for item in self.data.userRecord[user]: userListened[user][item['track']] = 1 print 'training...' iteration = 0 itemList = self.data.name2id['track'].keys() while iteration < self.maxIter: self.loss = 0 for user in self.data.userRecord: u = self.data.getId(user, 'user') for item in self.data.userRecord[user]: i = self.data.getId(item['track'], 'track') item_j = choice(itemList) while (userListened[user].has_key(item_j)): item_j = choice(itemList) j = self.data.getId(item_j, 'track') s = sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.P[u] -= self.lRate * self.alpha * ( self.beta * (self.P[u] - self.G[u]) + (1 - self.beta) * (self.P[u] - self.R[u])) self.P[u] -= self.lRate * self.regU * self.P[u] #self.Q[i] -= self.lRate * self.alpha*(self.Q[i]-self.I[i]) self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] #self.Q[j] -= self.lRate * self.alpha * (self.Q[j] - self.I[j]) self.loss += -log(s) self.loss += self.regU * (self.P * self.P).sum() + self.regI * (self.Q * self.Q).sum()\ +self.alpha*((1-self.beta)*((self.P-self.R)*(self.P-self.R)).sum()+ self.beta*((self.P-self.G)*(self.P-self.G)).sum())#+\ #self.alpha*((self.Q-self.I)*(self.Q-self.I)).sum() iteration += 1 if self.isConverged(iteration): break
def buildModel(self): iteration = 0 while iteration < self.maxIter: self.loss = 0 for sample in range(len(self.dao.user)): while True: userIdx = choice(self.dao.user.keys()) ratedItems = self.dao.trainSet_u[userIdx] if len(ratedItems) != 0: break #positive item index posItemIdx = choice(ratedItems.keys()) posPredictRating = self.predict(userIdx, posItemIdx) # social Items List socialItemsList = self.userSocialItemsSetList[userIdx] # negative item index while True: negItemIdx = choice(self.dao.item.keys()) if not (negItemIdx in ratedItems.keys()) and not ( negItemIdx in socialItemsList): break negPredictRating = self.predict(userIdx, negItemIdx) userId = self.dao.getUserId(userIdx) posItemId = self.dao.getItemId(posItemIdx) negItemId = self.dao.getItemId(negItemIdx) if len(socialItemsList) > 0: socialItemIdx = choice(socialItemsList) socialItemId = self.dao.getItemId(socialItemIdx) socialPredictRating = self.predict(userIdx, socialItemIdx) trustedUsers = self.sao.getFollowees(userId) socialWeight = 0 for trustedUserIdx in trustedUsers: socialRating = self.dao.rating(trustedUserIdx, socialItemIdx) if socialRating > 0: socialWeight += 1 posSocialDiffValue = (posPredictRating - socialPredictRating) / (1 + socialWeight) socialNegDiffValue = socialPredictRating - negPredictRating error = -math.log( qmath.sigmoid(posSocialDiffValue)) - math.log( qmath.sigmoid(socialNegDiffValue)) self.loss += error posSocialGradient = qmath.sigmoid(-posSocialDiffValue) socialNegGradient = qmath.sigmoid(-socialNegDiffValue) # update P, Q for factorIdx in range(self.k): userFactorValue = self.P[userId][factorIdx] posItemFactorValue = self.Q[posItemId][factorIdx] socialItemFactorValue = self.Q[socialItemId][factorIdx] negItemFactorValue = self.Q[negItemId][factorIdx] delta_puf = posSocialGradient * ( posItemFactorValue - socialItemFactorValue) / ( 1 + socialWeight) + socialNegGradient * ( socialItemFactorValue - negItemFactorValue) self.P[userId][factorIdx] += self.lRate * ( delta_puf - self.regU * userFactorValue) self.Q[posItemId][factorIdx] += self.lRate * ( posSocialGradient * userFactorValue / (1 + socialWeight) - self.regI * posItemFactorValue) delta_qkf = posSocialGradient * ( -userFactorValue / (1 + socialWeight) ) + socialNegGradient * userFactorValue self.Q[socialItemId][factorIdx] += self.lRate * ( delta_qkf - self.regI * socialItemFactorValue) self.Q[negItemId][factorIdx] += self.lRate * ( socialNegGradient * (-userFactorValue) - self.regI * negItemFactorValue) self.loss += self.regU * userFactorValue * userFactorValue + self.regI * posItemFactorValue * posItemFactorValue + self.regI * negItemFactorValue * negItemFactorValue + self.regI * socialItemFactorValue * socialItemFactorValue else: #if no social neighbors, the same as BPR posNegDiffValue = posPredictRating - negPredictRating self.loss += -math.log(qmath.sigmoid(posNegDiffValue)) posNegGradient = qmath.sigmoid(-posNegDiffValue) #update user factors, item factors for factorIdx in range(self.k): userFactorValue = self.P[self.dao.getUserId( userIdx)][factorIdx] posItemFactorValue = self.Q[self.dao.getItemId( posItemIdx)][factorIdx] negItemFactorValue = self.Q[self.dao.getItemId( negItemIdx)][factorIdx] self.P[userId][factorIdx] += self.lRate * ( posNegGradient * (posItemFactorValue - negItemFactorValue) - self.regU * userFactorValue) self.Q[posItemId][factorIdx] += self.lRate * ( posNegGradient * userFactorValue - self.regI * posItemFactorValue) self.Q[negItemId][factorIdx] += self.lRate * ( posNegGradient * (-userFactorValue) - self.regI * negItemFactorValue) self.loss += self.regU * userFactorValue * userFactorValue + self.regI * posItemFactorValue * posItemFactorValue + self.regI * negItemFactorValue * negItemFactorValue iteration += 1 if self.isConverged(iteration): break
def buildModel(self): #data clean # li = self.sao.followees.keys() # print 'Kind Note: This method will probably take much time.' # build U-F-NET print 'Building weighted user-friend network...' # filter isolated nodes and low ratings # Definition of Meta-Path self.G = np.random.rand(self.data.getSize('user'), self.k) / 10 self.W = np.random.rand(self.data.getSize('user'), self.walkDim) / 10 self.user2track = defaultdict(list) self.user2artist = defaultdict(list) self.user2album = defaultdict(list) self.track2user = defaultdict(list) self.artist2user = defaultdict(list) self.album2user = defaultdict(list) self.artist2track = defaultdict(list) self.artist2album = defaultdict(list) self.album2track = defaultdict(list) self.album2artist = {} self.track2artst = {} self.track2album = {} for user in self.data.userRecord: for item in self.data.userRecord[user]: self.user2track[user].append(item[self.recType]) self.user2artist[user].append(item['artist']) if self.data.columns.has_key('album'): self.user2album[user].append(item['album']) for artist in self.data.listened['artist']: for user in self.data.listened['artist'][artist]: self.artist2user[artist] += [ user ] * self.data.listened['artist'][artist][user] for track in self.data.trackListened: for user in self.data.trackListened[track]: self.track2user[track] += [ user ] * self.data.trackListened[track][user] if self.data.columns.has_key('album'): for album in self.data.listened['album']: for user in self.data.listened['album'][album]: self.album2user[album] += [ user ] * self.data.listened['album'][album][user] for artist in self.data.artist2Track: self.artist2track[artist] = self.data.artist2Track[artist].keys() for key in self.data.artist2Track[artist]: self.track2artst[key] = artist if self.data.columns.has_key('album'): for album in self.data.album2Track: self.album2track[album] = self.data.album2Track[album].keys() for key in self.data.album2Track[album]: self.track2album[key] = album for artist in self.data.artist2Album: self.artist2album[artist] = self.data.artist2Album[ artist].keys() for key in self.data.artist2Album[artist]: self.album2artist[key] = artist print 'Generating random meta-path random walks...' self.walks = [] #self.usercovered = {} p1 = 'UTU' p2 = 'UAU' p3 = 'UZU' p4 = 'UTATU' p5 = 'UTZTU' p6 = 'UTZAZTU' mPaths = [] if self.data.columns.has_key('album'): mPaths = [p1, p2, p3, p4, p5, p6] else: mPaths = [p1, p2, p4] for user in self.data.userRecord: for mp in mPaths: for t in range(self.walkCount): path = [user] lastNode = user nextNode = user lastType = 'U' for i in range(self.walkLength / len(mp[1:])): for tp in mp[1:]: try: if tp == 'T' and lastType == 'U': nextNode = choice( self.user2track[lastNode]) elif tp == 'T' and lastType == 'A': nextNode = choice( self.artist2track[lastNode]) elif tp == 'T' and lastType == 'Z': nextNode = choice( self.album2track[lastNode]) elif tp == 'A' and lastType == 'T': nextNode = self.track2artst[lastNode] elif tp == 'A' and lastType == 'Z': nextNode = self.album2artist[lastNode] elif tp == 'A' and lastType == 'U': nextNode = choice( self.user2artist[lastNode]) elif tp == 'Z' and lastType == 'U': nextNode = choice( self.user2album[lastNode]) elif tp == 'Z' and lastType == 'A': nextNode = choice( self.artist2album[lastNode]) elif tp == 'Z' and lastType == 'T': nextNode = self.track2album[lastNode] elif tp == 'U': if lastType == 'T': nextNode = choice( self.track2user[lastNode]) elif lastType == 'Z': nextNode = choice( self.album2user[lastNode]) elif lastType == 'A': nextNode = choice( self.artist2user[lastNode]) path.append(nextNode) lastNode = nextNode lastType = tp except (KeyError, IndexError): path = [] break if path: self.walks.append(path) # for node in path: # if node[1] == 'U' or node[1] == 'F': # self.usercovered[node[0]] = 1 # print path # if mp == 'UFIU': # pass shuffle(self.walks) print 'walks:', len(self.walks) # Training get top-k friends print 'Generating user embedding...' self.topKSim = {} model = w2v.Word2Vec(self.walks, size=self.walkDim, window=5, min_count=0, iter=self.epoch) for user in self.data.userRecord: uid = self.data.getId(user, 'user') self.W[uid] = model.wv[user] print 'User embedding generated.' print 'Constructing similarity matrix...' i = 0 for user1 in self.data.userRecord: uSim = [] i += 1 if i % 200 == 0: print i, '/', len(self.data.userRecord) vec1 = self.W[self.data.getId(user1, 'user')] for user2 in self.data.userRecord: if user1 <> user2: vec2 = self.W[self.data.getId(user2, 'user')] sim = cosine(vec1, vec2) uSim.append((user2, sim)) self.topKSim[user1] = sorted(uSim, key=lambda d: d[1], reverse=True)[:self.topK] # print 'Similarity matrix finished.' # # # #print self.topKSim #import pickle # # # # # # # #recordTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # similarity = open('SocialMR-lastfm-sim'+self.foldInfo+'.pkl', 'wb') # vectors = open('SocialMR-lastfm-vec'+self.foldInfo+'.pkl', 'wb') # #Pickle dictionary using protocol 0. # # pickle.dump(self.topKSim, similarity) # pickle.dump((self.W,self.G),vectors) # similarity.close() # vectors.close() # matrix decomposition #pkl_file = open('SocialMR-lastfm-sim' + self.foldInfo + '.pkl', 'rb') #self.topKSim = pickle.load(pkl_file) # self.F = np.random.rand(self.data.trainingSize()[0], self.k) / 10 # prepare Pu set, IPu set, and Nu set self.b = np.random.random(self.data.getSize('track')) / 10 print 'Preparing item sets...' self.PositiveSet = defaultdict(dict) self.pSet = defaultdict(list) self.IPositiveSet = defaultdict(dict) self.ipSet = defaultdict(list) # self.NegativeSet = defaultdict(list) for user in self.data.userRecord: for item in self.data.userRecord[user]: self.PositiveSet[user][item['track']] = 1 self.pSet[user].append(item['track']) for friend, sim in self.topKSim[user]: for item in self.data.userRecord[friend]: if not self.PositiveSet[user].has_key(item['track']): self.IPositiveSet[user][item['track']] = 1 self.ipSet[user].append(item['track']) Suk = 0.5 print 'Training...' iteration = 0 while iteration < self.maxIter: self.loss = 0 itemList = self.data.name2id['track'].keys() for user in self.pSet: u = self.data.getId(user, 'user') kItems = self.ipSet[user] for item in self.pSet[user]: i = self.data.getId(item, 'track') if len(self.ipSet[user]) > 0: item_k = choice(kItems) k = self.data.getId(item_k, 'track') s1 = sigmoid((self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[k])) / (Suk + 1)) self.P[u] += 1 / (Suk + 1) * self.lRate * (1 - s1) * ( self.Q[i] - self.Q[k]) self.Q[i] += 1 / (Suk + 1) * self.lRate * ( 1 - s1) * self.P[u] self.Q[k] -= 1 / (Suk + 1) * self.lRate * ( 1 - s1) * self.P[u] item_j = '' # if len(self.NegativeSet[user])>0: # item_j = choice(self.NegativeSet[user]) # else: item_j = choice(itemList) while (self.PositiveSet[user].has_key(item_j) or self.IPositiveSet[user].has_key(item_j)): item_j = choice(itemList) j = self.data.getId(item_j, 'track') s2 = sigmoid(self.P[u].dot(self.Q[k]) - self.P[u].dot(self.Q[j])) self.P[u] += self.lRate * (1 - s2) * (self.Q[k] - self.Q[j]) self.Q[k] += self.lRate * (1 - s2) * self.P[u] self.Q[j] -= self.lRate * (1 - s2) * self.P[u] self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.Q[k] -= self.lRate * self.regI * self.Q[k] self.loss += -log(s1) - log(s2) else: item_j = choice(itemList) while (self.PositiveSet[user].has_key(item_j)): item_j = choice(itemList) j = self.data.getId(item_j, 'track') s = sigmoid(self.P[u].dot(self.Q[i]) - self.P[u].dot(self.Q[j])) self.P[u] += self.lRate * (1 - s) * (self.Q[i] - self.Q[j]) self.Q[i] += self.lRate * (1 - s) * self.P[u] self.Q[j] -= self.lRate * (1 - s) * self.P[u] self.P[u] -= self.lRate * self.regU * self.P[u] self.Q[i] -= self.lRate * self.regI * self.Q[i] self.Q[j] -= self.lRate * self.regI * self.Q[j] self.loss += -log(s) self.loss += self.regU * (self.P * self.P).sum() + self.regI * ( self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break