def trainAutorec(testData, trainData, trainMask, unratedItemsMask, numOfRatings, unrated_items, GroundTruth, itemCount, topN, m, h, r, o, l, p, s=False): # prepare training bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, \ lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = initializeEvaluations(topN) userIdList_Train, allTrainData, allTestData, unratedTrainMask = dataprocess.prepareTrainAndTest(trainData, unratedItemsMask, testData) with tf.Graph().as_default(): data, mask, y, cost = models.Autorec(itemCount, h, r) # define optimizer if o == 1: optimizer = tf.train.GradientDescentOptimizer(l).minimize(cost) elif o == 2: optimizer = tf.train.RMSPropOptimizer(l).minimize(cost) else: optimizer = tf.train.AdamOptimizer(l).minimize(cost) printTrigger = True # Start training sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) for epoch in range(epochCount): random.shuffle(userIdList_Train) for batchId in range(int(len(userIdList_Train) / batchSize)): start = batchId * batchSize end = start + batchSize batchData = [] batchMask = [] for i in range(start, end): userId = userIdList_Train[i] batchData.append(trainData[userId]) if s: # random negative sampling unrated = np.zeros(itemCount) tmp = np.random.choice(unrated_items[userId], numOfRatings[userId], replace=False) for j in tmp: unrated[j] = 1 batchMask.append(unrated + trainMask[userId]) else: batchMask.append(trainMask[userId]) batchData = np.array(batchData) batchMask = np.array(batchMask) c, _ = sess.run([cost, optimizer], feed_dict={data: batchData, mask: batchMask}) print("[epoch %d/%d]\tcost : %.4f" % (epoch + 1, epochCount, c)) userID_list = [] for userId in testData: userID_list.append(userId) # calculate accuracy measures preidictedValues, predictedIndices = sess.run(tf.nn.top_k(y * mask, itemCount), feed_dict={data: allTrainData, mask: unratedTrainMask}) printTrigger, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, \ bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = \ evaluation.evaluation_topN (GroundTruth, predictedIndices, topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, epoch, c) if printTrigger == True: print("%.4f %.4f %.4f %.4f %.4f" % (bestPrecision[0], bestRecall[0], bestNDCG[0], bestMRR[0], bestGlobalMRR)) else: print("[" + p + " / sampling:" + str(s) + " Done...") sess.close() be="ML100k" d="d1" st=False de=False ran=0 a=0 b=0 # be d m p s st de r h o l ran a b precision recall ndcg mrr globalmrr cost epoch LAprecision LArecall LAndcg LAmrr LAglobalmrr printResultsToFile(topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, be, d, m, p, s, st, de, r, h, o, l, ran, a, b)
def trainDAS(testData, trainData, trainData_i, trainMask, trainMask_i, unratedItemsMask, positiveMask, numOfRatings, unrated_items, GroundTruth, itemCount, topN, m, h, r, o, l, ran, a, b): bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = initializeEvaluations(topN) with tf.Graph().as_default(): data, mask, data_i, mask_i, _positiveMask, length_positive, y, cost = models.DualAPR_sigmoid (itemCount, h, r, ran, a, b) if o == 1: optimizer = tf.train.GradientDescentOptimizer(l).minimize(cost) elif o == 2: optimizer = tf.train.RMSPropOptimizer(l).minimize(cost) else: optimizer = tf.train.AdamOptimizer(l).minimize(cost) # prepare training userIdList_Train, allTrainData, allTestData, unratedTrainMask = dataprocess.prepareTrainAndTest(trainData, unratedItemsMask, testData) printTrigger = True # Start training sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) #initialize_all_variables for epoch in range(epochCount): random.shuffle(userIdList_Train) for batchId in range(int(len(userIdList_Train) / batchSize)): start = batchId * batchSize end = start + batchSize batchData = [] batchMask = [] batchData_i = [] batchMask_i = [] batchPositiveMask = [] len_positive = [] for i in range(start, end): userId = userIdList_Train[i] batchData.append(trainData[userId]) batchMask.append(trainMask[userId]) batchData_i.append(trainData_i[userId]) batchMask_i.append(trainMask_i[userId]) batchPositiveMask.append(positiveMask[userId]) len_positive.append(numOfRatings[userId]) batchData = np.array(batchData) batchMask = np.array(batchMask) batchData_i = np.array(batchData_i) batchMask_i = np.array(batchMask_i) batchPositiveMask = np.array(batchPositiveMask) len_positive = np.array(len_positive) c, _ = sess.run([cost, optimizer], feed_dict={data: batchData, mask: batchMask, data_i: batchData_i, mask_i: batchMask_i, _positiveMask: batchPositiveMask, length_positive: len_positive}) print("[epoch %d/%d]\tcost : %.4f" % (epoch + 1, epochCount, c)) # calculate accuracy measures preidictedValues, predictedIndices = sess.run(tf.nn.top_k(y * mask, itemCount), feed_dict={data: allTrainData, mask: unratedTrainMask}) printTrigger, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = evaluation.evaluation_topN (GroundTruth, predictedIndices, topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, epoch, c) if printTrigger == True: print("%.4f %.4f %.4f %.4f %.4f" % (bestPrecision[0], bestRecall[0], bestNDCG[0], bestMRR[0], bestGlobalMRR)) else: print("[a: %d, b: %.2f, rankingparam: %.2f] Done..." % (a, b, ran)) sess.close() # be d m p s st de r h o l ran a b precision recall ndcg mrr globalmrr cost epoch LAprecision LArecall LAndcg LAmrr LAglobalmrr be="ML100k" d="d1" p="None" s=False st=False de=False printResultsToFile(topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, be, d, m, p, s, st, de, r, h, o, l, ran, a, b)
def trainSVDpp(testData, trainData, trainMask, unratedItemsMask, numOfRatings, unrated_items, GroundTruth, itemCount, topN, m, h, r, o, l, p, userCount, s = False): bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, \ lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = initializeEvaluations(topN) userIdList_Train, allTrainData, allTestData, unratedTrainMask, test_ulist = dataprocess.prepareTrainAndTest( trainData,unratedItemsMask,testData) # print(userCount, itemCount) with tf.Graph().as_default(): user_batch, item_batch, rate_batch, rating_list_batch, userImplicit_batch,\ embd_y, infer, cost = models.SVDpp(user_num=userCount, item_num=itemCount, r=r, dim=DIM) if o == 1: optimizer = tf.train.GradientDescentOptimizer(l).minimize(cost) elif o == 2: optimizer = tf.train.RMSPropOptimizer(l).minimize(cost) else: optimizer = tf.train.AdamOptimizer(l).minimize(cost) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) for epoch in range(epochCount): random.shuffle(userIdList_Train) for batchId in range(int(len(userIdList_Train) / batchSize)): start = batchId * batchSize end = start + batchSize batchUserId = [] batchItemId = [] batchRate = [] rated_itemsIndex = [] numOfRatings_list = [] userImplicit_list = [] for i in range(start, end): userId = userIdList_Train[i] for j in range(itemCount): if (trainMask[userId][j] != 0): batchUserId.append(userId) batchItemId.append(j) batchRate.append(trainData[userId][j]) rated_itemsIndex.append(np.nonzero(trainData[userId])) numOfRatings_list.append(numOfRatings[userId]) for j in range(len(batchUserId)): userImplicit = sess.run(embd_y, feed_dict={rating_list_batch: rated_itemsIndex[j]}) userImplicit = np.reshape(userImplicit, (len(userImplicit[0]), len(userImplicit[0][0]))) userImplicit = np.sum(userImplicit, axis=0) for k in range(DIM): userImplicit[k] = (userImplicit[k]/math.sqrt(numOfRatings_list[j])) userImplicit_list.append(userImplicit) c, _, predict = sess.run([cost, optimizer, infer], feed_dict={user_batch: batchUserId, item_batch: batchItemId, rate_batch: batchRate, userImplicit_batch: userImplicit_list}) print("[epoch %d/%d]\tcost : %.4f" % (epoch + 1, epochCount, c)) predictRate = [] for u in test_ulist: predictNumOfRate = numOfRatings[u] predictRateitemId = np.nonzero(testData[u]) predictBatchUserId = [] predictBatchItemId = [] predictBatchUserImplicit = [] for i in range(itemCount): predictBatchUserId.append(u) predictBatchItemId.append(i) tmp = sess.run(embd_y, feed_dict={rating_list_batch: predictRateitemId}) tmp = np.reshape(tmp, (len(tmp[0]), len(tmp[0][0]))) tmp = np.sum(tmp, axis=0) for k in range(DIM): tmp[k] = (tmp[k] / math.sqrt(predictNumOfRate)) for j in range(len(predictBatchUserId)): predictBatchUserImplicit.append(tmp) rate = sess.run([infer], feed_dict={user_batch: predictBatchUserId, item_batch: predictBatchItemId, userImplicit_batch: predictBatchUserImplicit }) predictRate.append(list(rate[0])) predictRate = np.asarray(predictRate) predictedValues, predictedIndices = sess.run(tf.nn.top_k(predictRate * unratedTrainMask, itemCount)) printTrigger, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, \ bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR = \ evaluation.evaluation_topN(GroundTruth, predictedIndices, topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, epoch, c) if printTrigger == True: print("%.4f %.4f %.4f %.4f %.4f" % ( bestPrecision[0], bestRecall[0], bestNDCG[0], bestMRR[0], bestGlobalMRR)) else: print("[" + p + " / sampling:" + str(s) + " Done...") sess.close() be = "ML100k" d = "d1" st = False de = False ran = 0 a = 0 b = 0 # be d m p s st de r h o l ran a b precision recall ndcg mrr globalmrr cost epoch LAprecision LArecall LAndcg LAmrr LAglobalmrr printResultsToFile(topN, bestPrecision, bestRecall, bestNDCG, bestMRR, bestGlobalMRR, bestCost, bestEpochCount, bestMeanForMeasures, lastRecall, lastPrecision, lastNDCG, lastMRR, lastGlobalMRR, be, d, m, p, s, st, de, r, h, o, l, ran, a, b)