def _define_metrics(self, M): self.metrics = { 'P@5': metrics.PrecisionAtN(k=5), # P@5 'P@10': metrics.PrecisionAtN(k=10), # P@10 'MAP': metrics.AP(k=M), # MAP 'R@5': metrics.RecallAtN(k=5), 'R@10': metrics.RecallAtN(k=10), 'NDCG': metrics.NDCG(k=M, gain_type='identity'), # NDCG 'MRR': metrics.MRR(k=M), # MRR 'SER@5': metrics.Serendipity(self.top_N_items, k=5), # Serendipity@5 'SER@10': metrics.Serendipity(self.top_N_items, k=10), # Serendipity@10 'NOV@5': metrics.Novelty(self.items_rated_by_user_train, k=5), # Novelty@5 'NOV@10': metrics.Novelty(self.items_rated_by_user_train, k=10), # Novelty@10 'DIV@5': metrics.Diversity(self.items_liked_by_user_dict, k=5), # Diversity@5 'DIV@10': metrics.Diversity(self.items_liked_by_user_dict, k=10) # Diversity@10 }
def main(): tf.set_random_seed(1) train_x, train_y = data_to_train_on() test_data = data_to_test() print('Loaded training data') num_train_samples = len(train_x) # num_test_samples = len(test_x) # Placeholders for variables qdpair = tf.placeholder(tf.float32, [None, train_x.shape[1]], name='qdpair') ranking = tf.placeholder(tf.float32, [None, train_y.shape[1]], name='ranking') training = tf.placeholder(tf.bool, None, name='training_phase') optimal_ranking = tf.placeholder(tf.float32, [None, train_y.shape[1]], name='optimal_ranking') model = CmpNN(qdpair, ranking, regu=0.01, num_features=train_x.shape[1], num_ranks=train_y.shape[1], training=training, optimal_rank=optimal_ranking) merged_summary = tf.summary.merge_all() sess = tf.Session() sess.run(tf.global_variables_initializer()) logdir = '/tmp/tensorflow_logs/lr/12' test_writer = tf.summary.FileWriter(logdir, graph=tf.get_default_graph()) num_epochs = 10000 for epoch in range(num_epochs): rand_idx = np.random.randint(num_train_samples, size=int(num_train_samples / num_epochs)) qdpair_batch = train_x[rand_idx, :] ranking_batch = train_y[rand_idx, :] _, summary = sess.run([model.optimize_adam, merged_summary], feed_dict={ qdpair: qdpair_batch, ranking: ranking_batch, training: True }) test_writer.add_summary(summary, epoch) preds = sess.run([model.prediction_softmax], feed_dict={qdpair: test_data[1]['qdpairs']}) # COMPUTE NDCG@10 AND MAP # iterate through test samples ndcgScore = [] apScore = [] for k in test_data: preds = sess.run([model.prediction_softmax], feed_dict={qdpair: test_data[k]['qdpairs']}) preds = np.squeeze(np.array(preds)) scores = np.zeros((int(test_data[k]['pair_indeces'].max())) + 1) for idx, row in enumerate(preds): scores[test_data[k]['pair_indeces'][idx, np.argmax(row)]] += 1 # sort is the score for each document by index # the arg sort then gets the documents ordered by best to worst relevance wise scores = np.argsort(scores) # now need to replace the values with the actual relevance score for each document for idx, val in enumerate(scores): scores[idx] = int(test_data[k]['target_vales'][val]) if met.NDCG(scores, 10) > 0: ndcgScore.append(met.NDCG(scores, 10)) else: ndcgScore.append(0) apScore.append(met.AP(scores)) # PRINT NDCG AND MAP FOR TEST SET print('NDCG@10 ', np.mean(ndcgScore)) print('MAP ', np.mean(apScore))
def main(): # Import training data train_x, train_y, qid = get_train_data() train_x = train_x.todense() train_y = pd.DataFrame(train_y, columns=['relevance']) train_y[train_y.columns[0]] = train_y[train_y.columns[0]].map({ 0: '0', 1: '1', 2: '2', 3: '3', 4: '4' }) train_y = pd.get_dummies(train_y).as_matrix() num_samples = train_x.shape[0] print('Loaded training data') # Import test data test, test_x, test_y = data_to_test() test_y = pd.DataFrame(test_y, columns=['relevance']) test_y[test_y.columns[0]] = test_y[test_y.columns[0]].map({ 0: '0', 1: '1', 2: '2', 3: '3', 4: '4' }) test_y = pd.get_dummies(test_y).as_matrix() print('Loaded test data') # Placeholders for variables qdpair = tf.placeholder(tf.float32, [None, train_x.shape[1]], name='qdpair') ranking = tf.placeholder(tf.float32, [None, train_y.shape[1]], name='qdpair') training = tf.placeholder(tf.bool, None, name='training_phase') optimal_ranking = tf.placeholder(tf.float32, [None, train_y.shape[1]], name='optimal_ranking') model = LogisticRegression(qdpair, ranking, regu=0, num_features=train_x.shape[1], num_ranks=train_y.shape[1], training=training) merged_summary = tf.summary.merge_all() iteration = str(8) sess = tf.Session() sess.run(tf.global_variables_initializer()) training_writer = tf.summary.FileWriter('/tmp/tensorflow_logs/lr/' + iteration + '_train', graph=tf.get_default_graph()) test_writer = tf.summary.FileWriter('/tmp/tensorflow_logs/lr/' + iteration + '_test', graph=tf.get_default_graph()) num_epochs = 30000 for epoch in range(num_epochs): rand_idx = np.random.randint(num_samples, size=int(num_samples / num_epochs)) qdpair_batch = train_x[rand_idx, :] ranking_batch = train_y[rand_idx, :] _, summary = sess.run([model.optimize, merged_summary], feed_dict={ qdpair: qdpair_batch, ranking: ranking_batch, training: True }) training_writer.add_summary(summary, epoch) if epoch % 100 == 0: _, summary = sess.run([model.cost, merged_summary], feed_dict={ qdpair: test_x, ranking: test_y, training: True }) test_writer.add_summary(summary, epoch) ndcgScore = [] apScore = [] for k in test: preds = np.squeeze( sess.run([model.prediction_softmax], feed_dict={ qdpair: test[k]['qd'], training: True })) ranks = [np.argmax(pred) for pred in preds] # sort is the score for each document by index # the arg sort then gets the documents ordered by best to worst relevance wise scores = np.argsort(ranks) # now need to replace the values with the actual relevance score for each document for idx, val in enumerate(scores): scores[idx] = int(test[k]['target'][val]) if met.NDCG(scores, 10) > 0: ndcgScore.append(met.NDCG(scores, 10)) apScore.append(met.AP(scores)) print(np.mean(ndcgScore)) print(np.mean(apScore))
def test_ndcg(net, data, num_users, ratio, save_file=None, gpus=[0]): """Evaluate net.""" progress = ProgressBar() posi_scores = [[] for u in range(num_users)] posi_binary = [[] for u in range(num_users)] net.eval() parallel = len(gpus) > 1 dtype = torch.FloatTensor if parallel else torch.cuda.FloatTensor #data.loader.dataset.set_to_posi() progress.reset(len(data.loader), messsage='Computing postiive outfits') for idx, inputv in enumerate(data.loader): items_text, nega_text, items_img, nega_img, uidx = inputv text = tuple(Variable(v.type(dtype)) for v in items_text[0]) img = tuple(Variable(v.type(dtype)) for v in items_img[0]) uidx = uidx.view(-1, 1) uidxv = torch.zeros(uidx.shape[0], num_users).scatter_(1, uidx, 1.0) uidxv = Variable(uidxv.type(dtype)) inputv = (text, img, uidxv) if parallel: scores, binary = data_parallel(net, inputv, gpus) else: scores, binary = net(*inputv) for n, u in enumerate(uidx.view(-1)): posi_binary[u].append(binary[n].item()) posi_scores[u].append(scores[n].data[0]) progress.forward() progress.end() # compute scores for negative outfits nega_scores = [[] for u in range(num_users)] nega_binary = [[] for u in range(num_users)] #data.loader.dataset.set_to_nega(ratio=6) progress.reset(len(data.loader), messsage='Computing negative outfits') for idx, inputv in enumerate(data.loader): posi_text, items_text, posi_img, items_img, uidx = inputv for i in range(ratio): text = tuple(Variable(v.type(dtype)) for v in items_text[i]) img = tuple(Variable(v.type(dtype)) for v in items_img[i]) uidx = uidx.view(-1, 1) uidxv = torch.zeros(uidx.shape[0], num_users).scatter_(1, uidx, 1.0) uidxv = Variable(uidxv.type(dtype)) inputv = (text,img, uidxv) if parallel: scores, binary = data_parallel(net, inputv, gpus) else: scores, binary = net(*inputv) for n, u in enumerate(uidx.view(-1)): nega_binary[u].append(binary[n].data[0]) nega_scores[u].append(scores[n].data[0]) progress.forward() progress.end() mean_ndcg, avg_ndcg = metrics.NDCG(posi_scores, nega_scores) mean_ndcg_bianry, avg_ndcg_binary = metrics.NDCG( posi_binary, nega_binary) aucs, mean_auc = metrics.ROC(posi_scores, nega_scores) aucs_binary, mean_auc_binary = metrics.ROC(posi_binary, nega_binary) results = dict( mean_ndcg=mean_ndcg, avg_ndcg=avg_ndcg, mean_ndcg_bianry=mean_ndcg_bianry, avg_ndcg_binary=avg_ndcg_binary, aucs=aucs, mean_auc=mean_auc, aucs_binary=aucs_binary, mean_auc_binary=mean_auc_binary) print('avg_mean_ndcg:{} avg_mean_auc:{}'.format( mean_ndcg.mean(), mean_auc)) # save results if os.path.exists(save_file): results.update(np.load(save_file)) np.savez(save_file, **results)