Example #1
0
    def mark_filter_valid(self, recs: dict, set_num: int, k: int):
        truth = self.truths[set_num]
        df_truth = pd.DataFrame(truth.items(), columns=["user_id", 'truth'])
        df_recs = pd.DataFrame(recs.items(), columns=["user_id", 'recommended'])

        df = df_truth.join(df_recs.set_index('user_id'), on="user_id") \
            [["truth", "recommended"]] \
            .dropna()
        return recmetrics.mark(df.truth.tolist(), df.recommended.tolist(), k)
Example #2
0
    def mark(self, recs: dict, set_num: int, k: int):
        truth = self.truths[set_num]
        df_truth = pd.DataFrame(truth.items(), columns=["user_id", 'truth'])
        df_recs = pd.DataFrame(recs.items(), columns=["user_id", 'recommended'])

        df = df_truth.join(df_recs.set_index('user_id'), on="user_id") \
            [["truth", "recommended"]]
        df.loc[df['recommended'].isnull(), ['recommended']] = df.loc[df['recommended'].isnull(), 'recommended'] \
            .apply(lambda x: [])
        return recmetrics.mark(df.truth.tolist(), df.recommended.tolist(), k)
Example #3
0
                outf.write(
                    'qID: {}, question: {}, persona: {}, guideline: {}, pred_answer: {}, gold_answer: {}, word_attn: {}, f1: {}\n'
                    .format(i, question, persona, guideline, system_output[i],
                            gold[i], word_attn[i], f1))
                count += 1

                if opt['sample'] and count == int(opt['sample']):
                    break

        map_score = metrics.mapk(gold, system_output)
        print('MAP: {}'.format(map_score))
        for i in [1, 3]:
            map_score = metrics.mapk(gold, system_output, k=i)
            print('MAP@{}: {}'.format(i, map_score))

        mar_score = recmetrics.mark(gold, system_output)
        print('MAR: {}'.format(mar_score))
        for i in [1, 3]:
            mar_score = recmetrics.mark(gold, system_output, k=i)
            print('MAR@{}: {}'.format(i, mar_score))

        count, avg_recall, avg_precision, avg_f1 = calc_avg_f1(gold,
                                                               system_output,
                                                               verbose=False)
        print(
            '\nQuestion type: {}\n Recall: {}\n Precision: {}\n F1: {}'.format(
                question_type, avg_recall, avg_precision, avg_f1))

        if opt['output']:
            print('Saved sampled *underperforming* examples to {}'.format(
                opt['output']))
    all_users_pred.append(user_predict)

print(len(all_users_act))
print('######################################')
print(len(all_users_pred))

# In[24]:

### reference : https://github.com/statisticianinstilettos/recmetrics/blob/master/example.ipynb
### finf mean avarage precision (MAP@k) and mean avarage recall (MAR@k)

import recmetrics
mark = []
k = [3, 5, 10, 15, 20, 25]
for n_top in k:
    mark.append(recmetrics.mark(all_users_act, all_users_pred, n_top))

print('mark=', mark)

#
#
#
import ml_metrics
#
mapk = []
k = [3, 5, 10, 15, 20, 25]
for n_top in k:
    mapk.append(ml_metrics.mapk(all_users_act, all_users_pred, n_top))
#
print('mapk=', mapk)
Example #5
0
                question_type, avg_recall, avg_precision, avg_f1))
        overall_count += count
        overall_recall += count * avg_recall
        overall_precision += count * avg_precision
        overall_f1 += count * avg_f1

        map_score = metrics.mapk(gold[question_type],
                                 system_output[question_type])
        print('MAP: {}'.format(map_score))
        for i in [1, 3]:
            map_score = metrics.mapk(gold[question_type],
                                     system_output[question_type],
                                     k=i)
            print('MAP@{}: {}'.format(i, map_score))

        mar_score = recmetrics.mark(gold[question_type],
                                    system_output[question_type])
        print('MAR: {}'.format(mar_score))
        for i in [1, 3]:
            mar_score = recmetrics.mark(gold[question_type],
                                        system_output[question_type],
                                        k=i)
            print('MAR@{}: {}'.format(i, mar_score))



    print('\nQuestion type: {}\n Recall: {}\n Precision: {}\n F1: {}\n'.format('overall', \
        overall_recall / overall_count, overall_precision / overall_count, overall_f1 / overall_count))

    out_file = '.'.join(config['model_file'].split('.')[:-1]) + \
                '_test_margin_{}_similarity_aug_ground_truth_{}_aug_similar_dishs_{}_ground_truth_ranking_margin_{}_similarity_score_ratio_{}_output.json'.format(config['test_margin'][0], similarity_augmented_ground_truth_answers, augment_similar_dishs, ground_truth_answer_ranking_margin, similarity_score_ratio)