def mark_filter_valid(self, recs: dict, set_num: int, k: int): truth = self.truths[set_num] df_truth = pd.DataFrame(truth.items(), columns=["user_id", 'truth']) df_recs = pd.DataFrame(recs.items(), columns=["user_id", 'recommended']) df = df_truth.join(df_recs.set_index('user_id'), on="user_id") \ [["truth", "recommended"]] \ .dropna() return recmetrics.mark(df.truth.tolist(), df.recommended.tolist(), k)
def mark(self, recs: dict, set_num: int, k: int): truth = self.truths[set_num] df_truth = pd.DataFrame(truth.items(), columns=["user_id", 'truth']) df_recs = pd.DataFrame(recs.items(), columns=["user_id", 'recommended']) df = df_truth.join(df_recs.set_index('user_id'), on="user_id") \ [["truth", "recommended"]] df.loc[df['recommended'].isnull(), ['recommended']] = df.loc[df['recommended'].isnull(), 'recommended'] \ .apply(lambda x: []) return recmetrics.mark(df.truth.tolist(), df.recommended.tolist(), k)
outf.write( 'qID: {}, question: {}, persona: {}, guideline: {}, pred_answer: {}, gold_answer: {}, word_attn: {}, f1: {}\n' .format(i, question, persona, guideline, system_output[i], gold[i], word_attn[i], f1)) count += 1 if opt['sample'] and count == int(opt['sample']): break map_score = metrics.mapk(gold, system_output) print('MAP: {}'.format(map_score)) for i in [1, 3]: map_score = metrics.mapk(gold, system_output, k=i) print('MAP@{}: {}'.format(i, map_score)) mar_score = recmetrics.mark(gold, system_output) print('MAR: {}'.format(mar_score)) for i in [1, 3]: mar_score = recmetrics.mark(gold, system_output, k=i) print('MAR@{}: {}'.format(i, mar_score)) count, avg_recall, avg_precision, avg_f1 = calc_avg_f1(gold, system_output, verbose=False) print( '\nQuestion type: {}\n Recall: {}\n Precision: {}\n F1: {}'.format( question_type, avg_recall, avg_precision, avg_f1)) if opt['output']: print('Saved sampled *underperforming* examples to {}'.format( opt['output']))
all_users_pred.append(user_predict) print(len(all_users_act)) print('######################################') print(len(all_users_pred)) # In[24]: ### reference : https://github.com/statisticianinstilettos/recmetrics/blob/master/example.ipynb ### finf mean avarage precision (MAP@k) and mean avarage recall (MAR@k) import recmetrics mark = [] k = [3, 5, 10, 15, 20, 25] for n_top in k: mark.append(recmetrics.mark(all_users_act, all_users_pred, n_top)) print('mark=', mark) # # # import ml_metrics # mapk = [] k = [3, 5, 10, 15, 20, 25] for n_top in k: mapk.append(ml_metrics.mapk(all_users_act, all_users_pred, n_top)) # print('mapk=', mapk)
question_type, avg_recall, avg_precision, avg_f1)) overall_count += count overall_recall += count * avg_recall overall_precision += count * avg_precision overall_f1 += count * avg_f1 map_score = metrics.mapk(gold[question_type], system_output[question_type]) print('MAP: {}'.format(map_score)) for i in [1, 3]: map_score = metrics.mapk(gold[question_type], system_output[question_type], k=i) print('MAP@{}: {}'.format(i, map_score)) mar_score = recmetrics.mark(gold[question_type], system_output[question_type]) print('MAR: {}'.format(mar_score)) for i in [1, 3]: mar_score = recmetrics.mark(gold[question_type], system_output[question_type], k=i) print('MAR@{}: {}'.format(i, mar_score)) print('\nQuestion type: {}\n Recall: {}\n Precision: {}\n F1: {}\n'.format('overall', \ overall_recall / overall_count, overall_precision / overall_count, overall_f1 / overall_count)) out_file = '.'.join(config['model_file'].split('.')[:-1]) + \ '_test_margin_{}_similarity_aug_ground_truth_{}_aug_similar_dishs_{}_ground_truth_ranking_margin_{}_similarity_score_ratio_{}_output.json'.format(config['test_margin'][0], similarity_augmented_ground_truth_answers, augment_similar_dishs, ground_truth_answer_ranking_margin, similarity_score_ratio)