def eval(self, U, V, ranges = [5,10,20,50,100]): recall_all = np.zeros(5, dtype=np.float32) ndcg_all = np.zeros(5, dtype=np.float32) map_all = np.zeros(5, dtype=np.float32) PRED_DIR = os.path.join(constants.DATA_DIR, 'prediction-temp') if not os.path.exists(PRED_DIR): os.mkdir(PRED_DIR) else: for f in glob.glob(os.path.join(PRED_DIR, '*.npz')): os.remove(f) print 'n_components = %d, lam = %.4f, lam_emb = %.4f'%(self.n_components, self.lam, self.lam_emb) for index, K in enumerate(ranges): recall_at_K = rec_eval.parallel_recall_at_k(self.train_data, self.test_data, U, V, k=K, vad_data=self.vad_data, n_jobs=16, clear_invalid=False, cache=True) print 'Test Recall@%d: \t %.4f' % (K, recall_at_K) ndcg_at_K = rec_eval.parallel_normalized_dcg_at_k(self.train_data, self.test_data, U, V, k=K, vad_data=self.vad_data, n_jobs=16, clear_invalid=False, cache=True) print 'Test NDCG@%d: \t %.4f' % (K, ndcg_at_K) map_at_K = rec_eval.parallel_map_at_k(self.train_data, self.test_data, U, V, k=K, vad_data=self.vad_data, n_jobs=16, clear_invalid=False, cache=True) print 'Test MAP@%d: \t %.4f' % (K, map_at_K) #if K == 100: # recall100 = recall_at_K # ndcg100 = ndcg_at_K # map100 = map_at_K recall_all[index] = recall_at_K ndcg_all[index] = ndcg_at_K map_all[index] = map_at_K #clean for f in glob.glob(os.path.join(PRED_DIR, '*.npz')): # print 'removing ', f os.remove(f) return (recall_all, ndcg_all, map_all)
def local_alone_eval(train_data, test_data, vad_data, U, V): recall100 = 0.0 ndcg100 = 0.0 map100 = 0.0 for K in [10, 20, 30, 100]: recall_at_K = rec_eval.parallel_recall_at_k(train_data, test_data, U, V, k=K, vad_data=vad_data, clear_invalid = True, n_jobs=4) print 'Test Recall@%d: %.4f' % (K, recall_at_K) ndcg_at_K = rec_eval.parallel_normalized_dcg_at_k(train_data, test_data, U, V, k=K, vad_data=vad_data, clear_invalid = True, n_jobs=4) print 'Test NDCG@%d: %.4f' % (K, ndcg_at_K) map_at_K = rec_eval.parallel_map_at_k(train_data, test_data, U, V, k=K, vad_data=vad_data, clear_invalid = True, n_jobs=4) print 'Test MAP@%d: %.4f' % (K, map_at_K) if K == 100: recall100 = recall_at_K ndcg100 = ndcg_at_K map100 = map_at_K return (recall100, ndcg100, map100)
def local_alone_eval(self, U, V): recall100 = 0.0 ndcg100 = 0.0 map100 = 0.0 for K in [5, 10, 20, 50, 100]: recall_at_K = rec_eval.parallel_recall_at_k(self.train_data, self.test_data, U, V, k=K, vad_data=self.vad_data, n_jobs=4, clear_invalid=False) print 'Test Recall@%d: \t %.4f' % (K, recall_at_K) ndcg_at_K = rec_eval.parallel_normalized_dcg_at_k( self.train_data, self.test_data, U, V, k=K, vad_data=self.vad_data, n_jobs=4, clear_invalid=False) print 'Test NDCG@%d: \t %.4f' % (K, ndcg_at_K) map_at_K = rec_eval.parallel_map_at_k(self.train_data, self.test_data, U, V, k=K, vad_data=self.vad_data, n_jobs=4, clear_invalid=False) print 'Test MAP@%d: \t %.4f' % (K, map_at_K) if K == 100: recall100 = recall_at_K ndcg100 = ndcg_at_K map100 = map_at_K return (recall100, ndcg100, map100)
def local_eval(self, U, V, best_ndcg_10): best_U = None best_V = None is_better = False for K in [5, 10, 20, 50, 100]: recall_at_K = rec_eval.parallel_recall_at_k(self.train_data, self.test_data, U, V, k=K, vad_data=self.vad_data, n_jobs=4) print 'Test Recall@%d: \t %.4f' % (K, recall_at_K) ndcg_at_K = rec_eval.parallel_normalized_dcg_at_k( self.train_data, self.test_data, U, V, k=K, vad_data=self.vad_data, n_jobs=4) print 'Test NDCG@%d: \t %.4f' % (K, ndcg_at_K) map_at_K = rec_eval.parallel_map_at_k(self.train_data, self.test_data, U, V, k=K, vad_data=self.vad_data, n_jobs=4) print 'Test MAP@%d: \t %.4f' % (K, map_at_K) if K == 10: if ndcg_at_K > best_ndcg_10: best_ndcg_10 = ndcg_at_K best_U = U best_V = V is_better = True return (is_better, best_U, best_V, best_ndcg_10)
# alpha = 20 gives the best validation performance for K in [5, 10, 20, 50, 100]: print 'Test Recall@%d: %.4f' % (K, rec_eval.parallel_recall_at_k( train_data, test_data, U_best, V_best, k=K, vad_data=vad_data, n_jobs=16)) print 'Test NDCG@%d: %.4f' % (K, rec_eval.parallel_normalized_dcg_at_k( train_data, test_data, U_best, V_best, k=K, vad_data=vad_data, n_jobs=16)) print 'Test MAP@%d: %.4f' % (K, rec_eval.parallel_map_at_k( train_data, test_data, U_best, V_best, k=K, vad_data=vad_data, n_jobs=16))
print 'Test NDCG@%d: %.4f' % (K, rec_eval.parallel_normalized_dcg_at_k( train_data, test_data, U, V, k=K, vad_data=vad_data, n_jobs=1, clear_invalid=True)) print 'Test MAP@%d: %.4f' % (K, rec_eval.parallel_map_at_k( train_data, test_data, U, V, k=K, vad_data=vad_data, n_jobs=1, clear_invalid=True)) print 'After applying ranking function' for threshold in [0.1]: for weight in [0.1]: for num_windows in [10, 100]: print 'threshold : %.5f , weight: %.5f, num_windows: %d' % ( threshold, weight, num_windows) for K in topk_range: res = ranked_eval2.evaluate(model_temp_path, project_ts_df, train_data,