Beispiel #1
0
 def eval(self, U, V, ranges = [5,10,20,50,100]):
     recall_all = np.zeros(5, dtype=np.float32)
     ndcg_all = np.zeros(5, dtype=np.float32)  
     map_all = np.zeros(5, dtype=np.float32)
     PRED_DIR = os.path.join(constants.DATA_DIR, 'prediction-temp')
     if not os.path.exists(PRED_DIR): os.mkdir(PRED_DIR)
     else:
         for f in glob.glob(os.path.join(PRED_DIR, '*.npz')):
             os.remove(f)
     print 'n_components = %d, lam = %.4f, lam_emb = %.4f'%(self.n_components, self.lam, self.lam_emb)
     for index, K in enumerate(ranges):
         recall_at_K = rec_eval.parallel_recall_at_k(self.train_data, self.test_data, U, V, k=K,
                                                     vad_data=self.vad_data, n_jobs=16, clear_invalid=False, cache=True)
         print 'Test Recall@%d: \t %.4f' % (K, recall_at_K)
         ndcg_at_K = rec_eval.parallel_normalized_dcg_at_k(self.train_data, self.test_data, U, V, k=K,
                                                           vad_data=self.vad_data, n_jobs=16, clear_invalid=False, cache=True)
         print 'Test NDCG@%d: \t %.4f' % (K, ndcg_at_K)
         map_at_K = rec_eval.parallel_map_at_k(self.train_data, self.test_data, U, V, k=K,
                                               vad_data=self.vad_data, n_jobs=16, clear_invalid=False, cache=True)
         print 'Test MAP@%d: \t %.4f' % (K, map_at_K)
         #if K == 100:
         #    recall100 = recall_at_K
         #    ndcg100 = ndcg_at_K
         #    map100 = map_at_K
         recall_all[index] = recall_at_K
         ndcg_all[index] = ndcg_at_K
         map_all[index] = map_at_K
     #clean
     for f in glob.glob(os.path.join(PRED_DIR, '*.npz')):
         # print 'removing ', f
         os.remove(f)
     return (recall_all, ndcg_all, map_all)
def local_alone_eval(train_data, test_data, vad_data, U, V):
    recall100 = 0.0
    ndcg100 = 0.0
    map100 = 0.0
    for K in [10, 20, 30, 100]:
        recall_at_K = rec_eval.parallel_recall_at_k(train_data, test_data, U, V, k=K,
                                           vad_data=vad_data, clear_invalid = True, n_jobs=4)
        print 'Test Recall@%d: %.4f' % (K, recall_at_K)
        ndcg_at_K = rec_eval.parallel_normalized_dcg_at_k(train_data, test_data, U, V, k=K,
                                                 vad_data=vad_data, clear_invalid = True, n_jobs=4)
        print 'Test NDCG@%d: %.4f' % (K, ndcg_at_K)
        map_at_K = rec_eval.parallel_map_at_k(train_data, test_data, U, V, k=K,
                                     vad_data=vad_data, clear_invalid = True, n_jobs=4)
        print 'Test MAP@%d: %.4f' % (K, map_at_K)
        if K == 100:
            recall100 = recall_at_K
            ndcg100 = ndcg_at_K
            map100 = map_at_K
    return (recall100, ndcg100, map100)
Beispiel #3
0
 def local_alone_eval(self, U, V):
     recall100 = 0.0
     ndcg100 = 0.0
     map100 = 0.0
     for K in [5, 10, 20, 50, 100]:
         recall_at_K = rec_eval.parallel_recall_at_k(self.train_data,
                                                     self.test_data,
                                                     U,
                                                     V,
                                                     k=K,
                                                     vad_data=self.vad_data,
                                                     n_jobs=4,
                                                     clear_invalid=False)
         print 'Test Recall@%d: \t %.4f' % (K, recall_at_K)
         ndcg_at_K = rec_eval.parallel_normalized_dcg_at_k(
             self.train_data,
             self.test_data,
             U,
             V,
             k=K,
             vad_data=self.vad_data,
             n_jobs=4,
             clear_invalid=False)
         print 'Test NDCG@%d: \t %.4f' % (K, ndcg_at_K)
         map_at_K = rec_eval.parallel_map_at_k(self.train_data,
                                               self.test_data,
                                               U,
                                               V,
                                               k=K,
                                               vad_data=self.vad_data,
                                               n_jobs=4,
                                               clear_invalid=False)
         print 'Test MAP@%d: \t %.4f' % (K, map_at_K)
         if K == 100:
             recall100 = recall_at_K
             ndcg100 = ndcg_at_K
             map100 = map_at_K
     return (recall100, ndcg100, map100)
Beispiel #4
0
 def local_eval(self, U, V, best_ndcg_10):
     best_U = None
     best_V = None
     is_better = False
     for K in [5, 10, 20, 50, 100]:
         recall_at_K = rec_eval.parallel_recall_at_k(self.train_data,
                                                     self.test_data,
                                                     U,
                                                     V,
                                                     k=K,
                                                     vad_data=self.vad_data,
                                                     n_jobs=4)
         print 'Test Recall@%d: \t %.4f' % (K, recall_at_K)
         ndcg_at_K = rec_eval.parallel_normalized_dcg_at_k(
             self.train_data,
             self.test_data,
             U,
             V,
             k=K,
             vad_data=self.vad_data,
             n_jobs=4)
         print 'Test NDCG@%d: \t %.4f' % (K, ndcg_at_K)
         map_at_K = rec_eval.parallel_map_at_k(self.train_data,
                                               self.test_data,
                                               U,
                                               V,
                                               k=K,
                                               vad_data=self.vad_data,
                                               n_jobs=4)
         print 'Test MAP@%d: \t  %.4f' % (K, map_at_K)
         if K == 10:
             if ndcg_at_K > best_ndcg_10:
                 best_ndcg_10 = ndcg_at_K
                 best_U = U
                 best_V = V
                 is_better = True
     return (is_better, best_U, best_V, best_ndcg_10)
Beispiel #5
0
    # alpha = 20 gives the best validation performance
    for K in [5, 10, 20, 50, 100]:
        print 'Test Recall@%d: %.4f' % (K,
                                        rec_eval.parallel_recall_at_k(
                                            train_data,
                                            test_data,
                                            U_best,
                                            V_best,
                                            k=K,
                                            vad_data=vad_data,
                                            n_jobs=16))
        print 'Test NDCG@%d: %.4f' % (K,
                                      rec_eval.parallel_normalized_dcg_at_k(
                                          train_data,
                                          test_data,
                                          U_best,
                                          V_best,
                                          k=K,
                                          vad_data=vad_data,
                                          n_jobs=16))
        print 'Test MAP@%d: %.4f' % (K,
                                     rec_eval.parallel_map_at_k(
                                         train_data,
                                         test_data,
                                         U_best,
                                         V_best,
                                         k=K,
                                         vad_data=vad_data,
                                         n_jobs=16))
        print 'Test NDCG@%d: %.4f' % (K,
                                      rec_eval.parallel_normalized_dcg_at_k(
                                          train_data,
                                          test_data,
                                          U,
                                          V,
                                          k=K,
                                          vad_data=vad_data,
                                          n_jobs=1,
                                          clear_invalid=True))
        print 'Test MAP@%d: %.4f' % (K,
                                     rec_eval.parallel_map_at_k(
                                         train_data,
                                         test_data,
                                         U,
                                         V,
                                         k=K,
                                         vad_data=vad_data,
                                         n_jobs=1,
                                         clear_invalid=True))

    print 'After applying ranking function'
    for threshold in [0.1]:
        for weight in [0.1]:
            for num_windows in [10, 100]:
                print 'threshold : %.5f , weight: %.5f, num_windows: %d' % (
                    threshold, weight, num_windows)
                for K in topk_range:
                    res = ranked_eval2.evaluate(model_temp_path,
                                                project_ts_df,
                                                train_data,