#combine the 3 dfs with GCNN RF,SVM,LGBM data expr_1 = first8 expr_2 = second8 expr_3 = third8 from iter_plot_help_funcs import find_active_percents, plot_metrics, plot_prec_rec_curve, plot_prec_rec_vs_tresh for exper in [expr_1, expr_2, expr_3]: exper = find_active_percents(exper, exp) # plot_metrics(exper,exp) # plot_prec_rec_curve(exper,exp) # plot_prec_rec_vs_tresh(exper,exp) # break #get gCNN rows: from iter_plot_help_funcs import find_active_percents, plot_metrics, plot_prec_rec_curve, plot_prec_rec_vs_tresh, plot_avg_percent_found, set_sns_pal plot_avg_percent_found(pd.concat([expr_1, expr_2, expr_3]), 'Mean Active Recovery for Exp_4', 15, 10) for exper in [expr_1, expr_2, expr_3]: exper_gcnn = exper[exper['Classifier'] == 'GCNN_pytorch'] df_list = [] for _, row in exper_gcnn.iterrows(): hist = row['hist'] row_df = pd.DataFrame(hist) test = pd.melt(row_df.reset_index(), id_vars=['index'], value_name='Score', var_name='Metric') test['AID'] = row['AID'] test['Iter_num'] = row['Iteration Number'] df_list.append(test) merged_df = pd.concat(df_list) g = sns.relplot(x='index',
diverse_run=pickle.load(pickle_off) pickle_off.close() expr_1 = random_run[random_run.AID != 'AID_605'] expr_2 = diverse_run[diverse_run.AID != 'AID_605'] '''This section plots the graphs''' from iter_plot_help_funcs import find_active_percents,plot_metrics,plot_prec_rec_curve,plot_prec_rec_vs_tresh,plot_avg_percent_found,set_sns_pal set_sns_pal('unpaired') for exper in [expr_1,expr_2]: exper = find_active_percents(exper,exp) plot_metrics(exper,exp) # plot_prec_rec_curve(exper,exp) # plot_prec_rec_vs_tresh(exper,exp) # break #get gCNN rows: plot_avg_percent_found(expr_1,'Mean Active Recovery for Classifiers with Diverse Exploration') plot_avg_percent_found(expr_2,'Mean Active Recovery for Classifiers with Random Exploration \n Initial Selection Strategy',10,5) '''Check difference between the random and diverse selections''' from iter_plot_help_funcs import get_checkpointsdf random_checkpoint = get_checkpointsdf(expr_1,10,5) class_selection_list=[] for _,row in random_checkpoint.iterrows(): class_selection_list.append(row.Classifier+'_random') random_checkpoint['Exp_Cond'] = class_selection_list diverse_checkpoint = get_checkpointsdf(expr_2,10,5) class_selection_list=[] for _,row in diverse_checkpoint.iterrows(): class_selection_list.append(row.Classifier+'_diverse') diverse_checkpoint['Exp_Cond'] = class_selection_list merged_23 = pd.concat([random_checkpoint,diverse_checkpoint])
plot_metrics(exper, exp) # plot_prec_rec_curve(exper,exp) # plot_prec_rec_vs_tresh(exper,exp) # break for exper in [svmexpr_1, svmexpr_2, svmexpr_3]: exper = find_active_percents(exper, exp) plot_metrics(exper, exp) # plot_prec_rec_curve(exper,exp) # plot_prec_rec_vs_tresh(exper,exp) # break from iter_plot_help_funcs import plot_avg_percent_found merged_df = pd.concat([expr_1, expr_2, expr_3]) svmmerged_df = pd.concat([svmexpr_1, svmexpr_2, svmexpr_3]) plot_avg_percent_found(merged_df) plot_avg_percent_found(svmmerged_df) #get gCNN rows: for exper in [expr_1, expr_2, expr_3]: exper_gcnn = exper[exper['Classifier'] == 'GCNN_pytorch'] df_list = [] for _, row in exper_gcnn.iterrows(): hist = row['hist'] row_df = pd.DataFrame(hist) test = pd.melt(row_df.reset_index(), id_vars=['index'], value_name='Score', var_name='Metric') test['AID'] = row['AID'] test['Iter_num'] = row['Iteration Number'] df_list.append(test)
exp = Experiment(api_key="sqMrI9jc8kzJYobRXRuptF5Tj", project_name="iter_plotting", workspace="gdreiman1", disabled=False) exp.log_code = True exp.log_other('Hypothesis', '''These are my plots from the intial iterations Iter_7 ''') import pickle import os import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np data_dir = '/home/gabriel/Dropbox/UCL/Thesis/Data' gcnn_initial = 'second_diverse_GCNN_50epoch_iter_run.pkl' save_path = os.path.join(data_dir, gcnn_initial) pickle_off = open(save_path, 'rb') gcnn_initial = pickle.load(pickle_off) pickle_off.close() from iter_plot_help_funcs import find_active_percents, plot_metrics, plot_prec_rec_curve, plot_prec_rec_vs_tresh, plot_avg_percent_found, set_sns_pal set_sns_pal('unpaired') for exper in [gcnn_initial]: exper = find_active_percents(exper, exp) plot_metrics(exper, exp) plot_avg_percent_found(gcnn_initial, 'Mean Active Recovery for \n Initial GCNN Experiment', 10, 5)
#combine the 3 dfs with GCNN RF,SVM,LGBM data expr_1 = first8 expr_2 = second8 expr_3 = third8 from iter_plot_help_funcs import find_active_percents, plot_metrics, plot_prec_rec_curve, plot_prec_rec_vs_tresh for exper in [expr_1, expr_2, expr_3]: exper = find_active_percents(exper, exp) # plot_metrics(exper,exp) # plot_prec_rec_curve(exper,exp) # plot_prec_rec_vs_tresh(exper,exp) # break #get gCNN rows: from iter_plot_help_funcs import find_active_percents, plot_metrics, plot_prec_rec_curve, plot_prec_rec_vs_tresh, plot_avg_percent_found, set_sns_pal plot_avg_percent_found( pd.concat([expr_1, expr_2, expr_3]), 'Mean Active Recovery for Classifiers with \n Epsilon-Greedy Diverse Exploration', 10, 5) for exper in [expr_1, expr_2, expr_3]: exper_gcnn = exper[exper['Classifier'] == 'GCNN_pytorch'] df_list = [] for _, row in exper_gcnn.iterrows(): hist = row['hist'] row_df = pd.DataFrame(hist) test = pd.melt(row_df.reset_index(), id_vars=['index'], value_name='Score', var_name='Metric') test['AID'] = row['AID'] test['Iter_num'] = row['Iteration Number'] df_list.append(test) merged_df = pd.concat(df_list)
#combine the 3 dfs with GCNN RF,SVM,LGBM data expr_1 = first8 expr_2 = second8 expr_3 = third8 from iter_plot_help_funcs import find_active_percents, plot_metrics, plot_prec_rec_curve, plot_prec_rec_vs_tresh for exper in [expr_1, expr_2, expr_3]: exper = find_active_percents(exper, exp) plot_metrics(exper, exp) # plot_prec_rec_curve(exper,exp) # plot_prec_rec_vs_tresh(exper,exp) # break #get gCNN rows: from iter_plot_help_funcs import find_active_percents, plot_metrics, plot_prec_rec_curve, plot_prec_rec_vs_tresh, plot_avg_percent_found, set_sns_pal plot_avg_percent_found(pd.concat([expr_1, expr_2, expr_3]), 'Mean Active Recovery for Test data', 10, 5) g = sns.relplot(x="Iteration Number", y="Score", hue='Classifier', style="Metric", col="AID", col_wrap=3, data=pd.concat([expr_1, expr_2, expr_3]), kind='line', legend='full', markers=True) for exper in [expr_1, expr_2, expr_3]: exper_gcnn = exper[exper['Classifier'] == 'GCNN_pytorch'] df_list = [] for _, row in exper_gcnn.iterrows():