def quanteval_plot_ind(model_folder, prefix_dir='', lower=False, verbose=False, debug=False): # plotting many scores over time for word similarity ckpt_nums, ckpt_names = find_list_ckpts(model_folder, prefix_dir=prefix_dir) if debug: ckpt_nums = ckpt_nums[:3] ckpt_names = ckpt_names[:3] scores_long = [] ckpt_nums_long = [] labels_long = [] for ckpt_num, ckpt_name in zip(ckpt_nums, ckpt_names): #print ckpt_name sp_corrs = quantitative_eval( [(model_folder, model_folder)], ckpt_files=[ckpt_name], prefix_dir=prefix_dir, metric_funcs=['max'], # we should allow the ability to change this lower=lower, verbose=verbose) scores = sp_corrs[model_folder + '/max'].tolist() scores_long = scores_long + scores ckpt_nums_long = ckpt_nums_long + len(eval_datasets_names) * [ckpt_num] labels_long = labels_long + eval_datasets_names # Next, add evaluation for SCWS df_sp = quantitative_scws_df(model_folder, prefix_dir, ckpt_file=ckpt_name, verbose=verbose) labels_long += df_sp['method'].tolist() ckpt_nums_long += len(df_sp) * [ckpt_num] scores_long += df_sp['spearman'].tolist() # Next, add the average of all scores labels_long += ['AVERAGE'] ckpt_nums_long += [ckpt_num] scores_long += [np.mean(np.array(scores))] # add more scores df = pd.DataFrame() df['x'] = ckpt_nums_long df['scores'] = scores_long df['dataset'] = labels_long plot = (ggplot(aes(x='x', y='scores', color='dataset'), data=df) + geom_point(size=5) + geom_line() + ggtitle("Scores as time progress")) return plot, df
def quantitative_eval_over_time(model_folder, prefix_dir='', lower=False): # This is using max cosine similarity ckpt_nums, ckpt_names = find_list_ckpts(model_folder, prefix_dir=prefix_dir) scores = [] for ckpt_num, ckpt_name in zip(ckpt_nums, ckpt_names): print(ckpt_name) sp_corrs = quantitative_eval([(model_folder, model_folder)], [ckpt_name], prefix_dir=prefix_dir, lower=lower) sum_score = sum(sp_corrs[model_folder + '/max']) scores.append(sum_score) df = pd.DataFrame() df['x'] = ckpt_nums df['scores'] = scores plot = (ggplot(aes(x='x', y='scores'), data=df) + geom_point(size=5) + geom_line() + ggtitle("Scores as time progress") ) return plot, df