def predictLabels(self, usr_rep, W, bds): bestCols = map(lambda v: v[0], bds) bestScore = usr_rep.transpose().dot(sumOverW(W, bestCols)) bdsRanges = [range(bd[0], bd[1] + 1) for bd in bds] allCombs = list(product(*bdsRanges)) for comb in allCombs: y_nonzeroCols_sample = list(comb) sumedW = sumOverW(W, y_nonzeroCols_sample) score = usr_rep.transpose().dot(sumedW) if score >= bestScore: bestScore = score bestCols = y_nonzeroCols_sample return bestCols
def test_sumOverW(self): W = np.array([[0, 1, 2, 4], [0, 1, 2, 4], ]) y_nonzeroCols = [0, 2] sumedW_y = sumOverW(W, y_nonzeroCols) for x, val in enumerate(sumedW_y): self.assertEqual(val, 2.0)
def test_getGradsOfW(self): W = np.array([[0, 1, 2, 4], [0, 1, 2, 4], ]).astype(float) y_nonzeroCols = [0, 2] # assume y = [1,0,1,0] sumedW_y = sumOverW(W, y_nonzeroCols) # should be [2,2]' usr_rep = np.array([[1, 1], [2, 2], ])[0].astype(float) # should be [1,1]' sigmoid_y = sigmoid(- usr_rep.transpose().dot(sumedW_y)) sigmoids_negs = [sigmoid(10), sigmoid(8)] # assume yneg = [0,1,0,1], [1,0,0,1] resp y_negsNonzeroCols = [[1, 3], [0, 3]] gradsOfW = np.zeros((2, 4)) NEG_SAMPLE_NUM = int(random.random() * 10) # Dont care ITEM_FIELDS_NUM = W.shape[0] MAX_TRAIN_NUM = int(random.random() * 10) # Dont care LEARNING_RATE = random.random() # Dont care MOMENTUM = int(random.random() * 10) # Dont care LAMBDA = random.random() # Dont care args = (NEG_SAMPLE_NUM, ITEM_FIELDS_NUM, MAX_TRAIN_NUM, LEARNING_RATE, MOMENTUM, LAMBDA) baseupdator = Baseupdator(*args) # col = 0 grad0 = - sigmoid(-4) * usr_rep grad1 = (0 + sigmoids_negs[1]) * usr_rep grad2 = 2 * LAMBDA * W[:, [0]] gradsOfW[:, [0]] = grad0.reshape(2, 1) + grad1.reshape(2, 1) + grad2 #print grad0, grad1, grad2 # col = 1 grad0 = - sigmoid(-4) * usr_rep * 0 grad1 = (sigmoids_negs[0] + 0) * usr_rep grad2 = 2 * LAMBDA * W[:, [1]] gradsOfW[:, [1]] = grad0.reshape(2, 1) + grad1.reshape(2, 1) + grad2 #print grad0, grad1, grad2 # col = 2 grad0 = - sigmoid(-4) * usr_rep grad1 = (0 + 0) * usr_rep grad2 = 2 * LAMBDA * W[:, [2]] gradsOfW[:, [2]] = grad0.reshape(2, 1) + grad1.reshape(2, 1) + grad2 #print grad0, grad1, grad2 # col = 3 grad0 = - sigmoid(-4) * usr_rep * 0 grad1 = (sigmoids_negs[0] + sigmoids_negs[1]) * usr_rep grad2 = 2 * LAMBDA * W[:, [3]] gradsOfW[:, [3]] = grad0.reshape(2, 1) + grad1.reshape(2, 1) + grad2 #print grad0, grad1, grad2 actualGradsOfW = baseupdator.getGradsOfW(W, y_nonzeroCols, sigmoid_y, usr_rep, sigmoids_negs, y_negsNonzeroCols) expectGradsOfW = gradsOfW for (x, y), ele in np.ndenumerate(actualGradsOfW): self.assertEqual(actualGradsOfW[x, y], expectGradsOfW[x, y])
def getSampledLoss(self, W, V, usr2NonzeroCols, usr2negsNonzeroCols, usr2itemsIndx, pooler, sampleSize=10): loss = 0.0 usrs = list(usr2itemsIndx.keys()) sampledUsrsIndice = random.sample(range( len(usrs)), sampleSize) # sample without replacement sampledUsrs = map(lambda ind: usrs[ind], sampledUsrsIndice) for usrid in sampledUsrs: usrloss = 0.0 usr_rep = pooler.pool_all(usr2itemsIndx[usrid], V) # 0. -log( sigmoid( usr_rep * sumedW_y) ) y_nonzeroCols = usr2NonzeroCols[usrid] sumedW_y = sumOverW(W, y_nonzeroCols) usrloss += (-1) * math.log( sigmoid(usr_rep.transpose().dot(sumedW_y))) # 1. summation log( sigmoid( usr_rep * sumedW_neg ) ) y_negsNonzeroCols = usr2negsNonzeroCols[usrid] sumedW_negs = map( lambda y_negNonzeroCols: sumOverW(W, y_negNonzeroCols).reshape( self.ITEM_FIELDS_NUM, 1), y_negsNonzeroCols) usrloss += (-1) * sum( map( lambda sumedW_neg: math.log( sigmoid((-1) * usr_rep.transpose().dot(sumedW_neg))), sumedW_negs)) # 2. l2 norm l2norm = np.linalg.norm(W) + sum( map(lambda v: np.linalg.norm(v), V)) usrloss += l2norm loss += usrloss return loss
def getTerms(self, usrid, usr2labels, usr2NonzeroCols, usr2itemsIndx, W, usr_rep, usr2negsNonzeroCols): y = usr2labels[usrid] y_nonzeroCols = usr2NonzeroCols[usrid] itemsIndx = usr2itemsIndx[usrid] sumedW_y = sumOverW(W, y_nonzeroCols) sigmoid_y = sigmoid((-1) * usr_rep.transpose().dot(sumedW_y) ) # equivalent to 1 - sigmoid(...) y_negsNonzeroCols = usr2negsNonzeroCols[usrid] sumedW_negs = map( lambda y_negNonzeroCols: sumOverW(W, y_negNonzeroCols).reshape( self.ITEM_FIELDS_NUM, 1), y_negsNonzeroCols) sigmoids_negs = map( lambda sumedW_neg: sigmoid(usr_rep.transpose().dot(sumedW_neg)), sumedW_negs) sigmoidedSumedW = np.zeros((self.ITEM_FIELDS_NUM, 1)) for ind, sigmoid_neg in enumerate(sigmoids_negs): sigmoidedSumedW += sigmoid_neg * sumedW_negs[ind] return y, y_nonzeroCols, itemsIndx, sumedW_y, sigmoid_y, \ y_negsNonzeroCols, sumedW_negs, sigmoids_negs, sigmoidedSumedW
def getAvgLoss(self, W, V, usr2NonzeroCols, usr2negsNonzeroCols, usr2itemsIndx, pooler): loss = 0.0 cnt = 0 for usrid in usr2itemsIndx: try: usrloss = 0.0 usr_rep = pooler.pool_all(usr2itemsIndx[usrid], V) # 0. -log( sigmoid( usr_rep * sumedW_y) ) y_nonzeroCols = usr2NonzeroCols[usrid] sumedW_y = sumOverW(W, y_nonzeroCols) usrloss += (-1) * math.log( sigmoid(usr_rep.transpose().dot(sumedW_y))) # 1. summation log( sigmoid( usr_rep * sumedW_neg ) ) y_negsNonzeroCols = usr2negsNonzeroCols[usrid] sumedW_negs = map( lambda y_negNonzeroCols: sumOverW(W, y_negNonzeroCols). reshape(self.ITEM_FIELDS_NUM, 1), y_negsNonzeroCols) usrloss += (-1) * sum( map( lambda sumedW_neg: math.log( sigmoid( (-1) * usr_rep.transpose().dot(sumedW_neg))), sumedW_negs)) # 2. l2 norm l2norm = np.linalg.norm(W) + sum( map(lambda v: np.linalg.norm(v), V)) usrloss += l2norm loss += usrloss cnt += 1 except: loss += 0.0 cnt += 0 return loss / cnt
def test_getGradsOfV(self): W = np.array([[0, 1, 2, 4], [0, 1, 2, 4], ]).astype(float) V = np.array([[1, 1], [2, 2], [3, 3], [4, 4], ]).astype(float) itemsIndx = [0, 3] y_nonzeroCols = [0, 2] # assume y = [1,0,1,0] sumedW_y = sumOverW(W, y_nonzeroCols) # should be [2,2]' usr_rep = np.array([[1, 1], [2, 2], ])[0] # should be [1,1]' sigmoid_y = sigmoid(- usr_rep.transpose().dot(sumedW_y)) sigmoidedSumedW = np.array([[1], [1], ]) NEG_SAMPLE_NUM = int(random.random() * 10) # Dont care ITEM_FIELDS_NUM = W.shape[0] MAX_TRAIN_NUM = int(random.random() * 10) # Dont care LEARNING_RATE = random.random() # Dont care MOMENTUM = int(random.random() * 10) # Dont care LAMBDA = random.random() # Dont care args = (NEG_SAMPLE_NUM, ITEM_FIELDS_NUM, MAX_TRAIN_NUM, LEARNING_RATE, MOMENTUM, LAMBDA) baseupdator = Baseupdator(*args) gradsOfV = {} # V[0] grad3 = (-1) * sigmoid_y * (1.0 / 2) * sumedW_y grad4 = sigmoidedSumedW.transpose() * (1.0 / 2) grad5 = 2 * LAMBDA * V[0] gradsOfV[0] = (grad3 + grad4 + grad5).reshape(2, ) # V[1] grad3 = (-1) * sigmoid_y * (1.0 / 2) * sumedW_y grad4 = sigmoidedSumedW.transpose() * (1.0 / 2) grad5 = 2 * LAMBDA * V[3] gradsOfV[3] = (grad3 + grad4 + grad5).reshape(2, ) actualGradsOfV = baseupdator.getGradsOfV(V, itemsIndx, sumedW_y, sigmoid_y, sigmoidedSumedW) expectGradsOfV = gradsOfV for itemIndx in actualGradsOfV: for x, e in enumerate(actualGradsOfV[itemIndx]): self.assertEqual(actualGradsOfV[itemIndx][x], expectGradsOfV[itemIndx][x])
def test_getTerms(self): usrid = 0 usr2labels = { 0: [0, 1, 0, 1], 1: [1, 0, 1, 0], } usr2NonzeroCols = { 0: [1, 3], 1: [0, 2], } usr2itemsIndx = { 0: [0, 1], 1: [2, 3], } W = np.array([[0, 1, 2, 4], [0, 1, 2, 4], ]) usr_rep = np.array([[1, 1], [2, 2], ])[0] # should be [1,1]' usr2negsNonzeroCols = { 0: [[0, 3], [1, 2]], 1: [[1, 3], [0, 2]], } NEG_SAMPLE_NUM = int(random.random() * 10) # Dont care ITEM_FIELDS_NUM = W.shape[0] MAX_TRAIN_NUM = int(random.random() * 10) # Dont care LEARNING_RATE = random.random() # Dont care MOMENTUM = int(random.random() * 10) # Dont care LAMBDA = random.random() # Dont care args = (NEG_SAMPLE_NUM, ITEM_FIELDS_NUM, MAX_TRAIN_NUM, LEARNING_RATE, MOMENTUM, LAMBDA) baseupdator = Baseupdator(*args) # Actual: y, y_nonzeroCols, itemsIndx, sumedW_y, sigmoid_y, \ y_negsNonzeroCols, sumedW_negs, sigmoids_negs, \ sigmoidedSumedW = baseupdator.getTerms( usrid, usr2labels, usr2NonzeroCols, usr2itemsIndx, W, usr_rep, usr2negsNonzeroCols ) # expected: y2 = [0, 1, 0, 1] y_nonzeroCols2 = [1, 3] itemsIndx2 = [0, 1] sumedW_y2 = sumOverW(W, y_nonzeroCols2) sigmoid_y2 = sigmoid(- usr_rep.transpose().dot(sumedW_y2)) y_negsNonzeroCols2 = [[0, 3], [1, 2]] sumedW_negs2 = [sumOverW(W, [0, 3]).reshape(ITEM_FIELDS_NUM, 1), sumOverW(W, [1, 2]).reshape(ITEM_FIELDS_NUM, 1)] sigmoids_negs2 = [sigmoid(usr_rep.transpose().dot(sumedW_negs2[0])), sigmoid(usr_rep.transpose().dot(sumedW_negs2[1]))] sigmoidedSumedW2 = sigmoids_negs2[0] * sumedW_negs2[0] + sigmoids_negs2[1] * sumedW_negs2[1] self.assertEqual(y2, y) self.assertEqual(y_nonzeroCols2, y_nonzeroCols) self.assertEqual(itemsIndx2, itemsIndx) for x, e in enumerate(sumedW_y2): self.assertEqual(sumedW_y2[x], sumedW_y[x]) self.assertEqual(sigmoid_y2, sigmoid_y) self.assertEqual(y_negsNonzeroCols2, y_negsNonzeroCols) for ind, e in enumerate(sumedW_negs2): for x, e2 in enumerate(e): self.assertEqual(e[x], sumedW_negs[ind][x]) self.assertEqual(sigmoids_negs2, sigmoids_negs) for x, e in enumerate(sigmoidedSumedW2): self.assertEqual(sigmoidedSumedW2[x], sigmoidedSumedW[x])