コード例 #1
0
def eval(list):

    preloaded = inout.load_validation(FOLDER_TEST)

    if preloaded is not None:
        preloaded[0].sort_values(['num_samples', 'name'], inplace=True)

        all = pd.DataFrame()
        all_parts = pd.DataFrame()

        for m in list:
            res = pd.read_csv(FOLDER_TEST + 'results_' + m + '.csv')
            res, res_parts = evaluate(res,
                                      FOLDER_TEST,
                                      strict=True,
                                      preloaded=preloaded)
            res['method'] = [m]
            res_parts['method'] = [m]

            all = pd.concat([all, res])
            all_parts = pd.concat([all_parts, res_parts])

        print(all)
        print(all_parts)

        all.to_csv(FOLDER_TEST + 'eval.csv')
        all_parts.to_csv(FOLDER_TEST + 'eval_parts.csv')
コード例 #2
0
ファイル: opt_implicit.py プロジェクト: rn5l/rsc18
def eval(list, basepath, iteration):

    preloaded = inout.load_validation(FOLDER_TEST)
    preloaded[0].sort_values(['num_samples', 'name'], inplace=True)

    all = pd.DataFrame()
    all_parts = pd.DataFrame()

    for m in list:
        res = pd.read_csv(FOLDER_TEST + basepath + m + '_' + str(iteration) +
                          '.csv')
        res, res_parts = evaluate(res,
                                  FOLDER_TEST,
                                  strict=True,
                                  preloaded=preloaded)
        res['method'] = [m]
        res_parts['method'] = [m]

        all = pd.concat([all, res])
        all_parts = pd.concat([all_parts, res_parts])

    print(all)
    #print( all_parts )

    all.to_csv(FOLDER_TEST + basepath + '_eval_' + str(iteration) + '.csv')
    all_parts.to_csv(FOLDER_TEST + basepath + '_evalparts_' + str(iteration) +
                     '.csv')

    return all['rp'].values[0], all['pages'].values[0], all['ndcg'].values[0]
コード例 #3
0
 def __init__(self, folder, num_lists=100):
     '''
     Constructor
     '''
 
     vlists, truth = inout.load_validation(folder)
     tlists, test = inout.load_test(folder)
                           
     tlists.sort_values('num_samples', inplace=True)
     lists = tlists.reset_index(drop=True)
             
     if num_lists < len(tlists):
         orgcat = math.ceil(len(tlists) / 10)
         percat = int(num_lists / 10)
         indices = list( range(percat) )
         for i in range(1, 10):
             indices += list( range(orgcat * i, orgcat * i + percat) )
         tlists = tlists.ix[indices]
     
     truth = truth[truth.playlist_id.isin( tlists.playlist_id.unique() )]
     test = test[test.playlist_id.isin( tlists.playlist_id.unique() )]
              
     self.lists = tlists
     self.test = test
     self.truth = truth
     self.folder = folder
コード例 #4
0
ファイル: eval.py プロジェクト: rn5l/rsc18
def evaluate(result, test_folder, strict=False, preloaded=None):

    print('evaluate result ', ('strict' if strict else 'loose'))

    if preloaded is None:
        lists, truth = inout.load_validation(test_folder)
        lists.sort_values('num_samples', inplace=True)
    else:
        lists, truth = preloaded

    print('number of test lists: ', len(lists))
    not_in_actions = np.setdiff1d(lists.playlist_id.unique(),
                                  truth.playlist_id.unique())
    if len(not_in_actions) > 0:
        print(lists[np.in1d(lists.playlist_id, not_in_actions)])
        print('validation data missing')
        exit()

    print(' -- eval set loaded')

    res = pd.DataFrame()
    res['rp'] = [0]
    res['pages'] = [0]
    res['ndcg'] = [0]

    res_parts = pd.DataFrame()

    count = 0
    tstart = time.time()

    result.reset_index(inplace=True)
    result_map = pd.Series(index=list(result.playlist_id.unique()),
                           data=range(len(result.playlist_id.unique())))
    result_start = np.r_[0,
                         result.groupby('playlist_id').size().cumsum().values]
    result_tracks = result.track_id.values

    truth.sort_values(['playlist_id', 'pos'], inplace=True)
    truth = truth.reset_index(drop=True)
    true_map = pd.Series(index=list(truth.playlist_id.unique()),
                         data=range(len(truth.playlist_id.unique())))
    true_start = np.r_[0, truth.groupby('playlist_id').size().cumsum().values]
    true_tracks = truth.track_id.values

    klist = []

    for plist in lists.itertuples():

        pid = plist.playlist_id

        if pid not in result_map.index:
            if strict:
                print('no results for playlist ', pid)
                exit()
            continue

        true_idx = true_map[pid]
        result_idx = result_map[pid]

        recs = result_tracks[result_start[result_idx]:result_start[result_idx +
                                                                   1]]
        tracks = true_tracks[true_start[true_idx]:true_start[true_idx + 1]]

        if len(tracks) == 0:
            print('no tracks for playlist ', plist.playlist_id)
            exit()

        if strict and len(recs) != 500:
            print(recs)
            print(len(recs))
            raise Exception('no valid result set for playlist ',
                            plist.playlist_id)
        elif len(recs) == 0:
            continue

        k = key(plist)
        if not 'rp_' + k in res_parts.columns:
            klist.append(k)
            res_parts['rp_' + k] = [0]
            res_parts['page_' + k] = [0]
            res_parts['ndcg_' + k] = [0]
            res_parts['count_' + k] = [0]
            res_parts['samples_' + k] = [plist.num_samples]

        r_prec = r_precision(recs, tracks)
        pages = rec_page(recs, tracks)
        ndcga = ndcg(recs, tracks)

        res['rp'] += r_prec
        res['pages'] += pages
        res['ndcg'] += ndcga

        res_parts['rp_' + k] += r_prec
        res_parts['page_' + k] += pages
        res_parts['ndcg_' + k] += ndcga

        count += 1
        res_parts['count_' + k] += 1

        if count % 5000 is 0:
            print(' -- evaluated {} of {} lists in {}s'.format(
                count, len(lists), (time.time() - tstart)))

    print(' -- evaluated all lists in {}s'.format((time.time() - tstart)))

    res = res / count
    for k in klist:
        res_parts['rp_' + k] = res_parts['rp_' + k] / res_parts['count_' + k]
        res_parts['page_' +
                  k] = res_parts['page_' + k] / res_parts['count_' + k]
        res_parts['ndcg_' +
                  k] = res_parts['ndcg_' + k] / res_parts['count_' + k]

    return res, res_parts