def test(top_models = 3): horizontal = [int(x.split('.')[0]) for x in df.columns.values] y = np.round(100*model.predict_proba(df), 2) #pred = encoder.inverse_transform(np.argmax(y , axis = 1)) idx = y.argsort()[:,::-1][:,:top_models] label_path = os.path.join(config._get_path('graphics'), 'labeling') min_array = [] mean_array = [] max_array = [] for cl in range(len(encoder.classes_)): min_array.append(np.min(all_y[cl])) mean_array.append(np.mean(all_y[cl])) max_array.append(np.max(all_y[cl])) if not os.path.exists(label_path): os.makedirs(label_path) #https://github.com/matplotlib/matplotlib/issues/8519#issuecomment-608434198 plt.ioff() for i in range(idx.shape[0]): sample = df.iloc[i] k = 1 plt.figure(figsize=(top_models*5, 7)) for j in idx[i]: m = encoder.inverse_transform([j])[0] plt.subplot(1, top_models, k) plt.title("{}, Probability: {}%".format(m, y[i][j])) plt.xlabel("Wavelenght (1/cm)") plt.plot(horizontal, sample.values, '-', color= purple, label= "Sample") plt.plot(horizontal, mean_array[j], '-', color= blue, label= m) plt.fill_between(horizontal, min_array[j], max_array[j], alpha= 0.25,color=blue) plt.legend(loc="best") k += 1 sample = None plt.savefig(os.path.join(label_path, 'prediction_{}.png'.format(i)), dpi = 300, bbox_inches = "tight") plt.close() return None
#Sklearn Model Selection from sklearn.model_selection import GridSearchCV from sklearn.model_selection import StratifiedKFold #Project modules from utils import file_name as f_name from utils import append_time from param_grid import neural_grid, classical_grid from pipeline import build_pipe from baseline import als #Config module import config seed = config._seed() gs_folder = config._get_path('grid_search') def search(scaler='', baseline=True, pca=True, over_sample=True, param_grid=classical_grid(), prefix='', n_jobs=1, save=True): print('Loading training set...') X_train = pd.read_csv(os.path.join('data', 'X_train.csv')) y_train = pd.read_csv(os.path.join('data', 'y_train.csv')).values.ravel()
def best_results(): estimator_path = {} models = {} for scaler, baseline, pca, over, nn in product([False, True], repeat=5): file_name = f_name(nn=nn, baseline=baseline, scaler=scaler, pca=pca, over_sample=over) file_path = os.path.join(config._get_path('grid_search'), file_name) if os.path.isfile(file_path): df = pd.read_csv(file_path) replace = True if nn: row = df.iloc[0] if 'NN' in models: if models['NN'][4] <= -row["neg_log_loss"]: replace = False if replace: models['NN'] = [ int(baseline), int(scaler), int(pca), int(over), -row["neg_log_loss"], row["std"] ] estimator_path['NN'] = file_path else: for key in classical_models: row = next(r for _, r in df.iterrows() if key in r["estimator"]) replace = True if classical_models[key] in models: if (models[classical_models[key]][4] <= -row["neg_log_loss"]): replace = False if replace: models[classical_models[key]] = [ int(baseline), int(scaler), int(pca), int(over), -row["neg_log_loss"], row["std"] ] estimator_path[key] = file_path else: print(file_name + " does not exists, please run the gridSearch!") break data = [] idxs = [] for key in models: data.append(models[key]) idxs.append(key) df = pd.DataFrame(data=data, columns=[ "Baseline", "Standard scaler", "PCA (99%)", "Over sample", "Log-loss", "Standard Deviation" ], index=idxs) df.sort_values(by=["Log-loss"], inplace=True) return df, estimator_path
#load project modules import config from param_grid import build_nn from utils import append_time, build_row from baseline import als from tensorflow.keras.wrappers.scikit_learn import KerasClassifier from tensorflow.keras.callbacks import EarlyStopping import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) #Beta version! mccv_path = config._get_path('mccv') def results_total(X, name, sufix, temp=True): #Arrumar! posfix = '_temp' if temp else '' filepath = os.path.join(mccv_path, name, sufix + posfix + '.csv') pd.DataFrame(data=X, columns=[ 'Cross_Entropy_train', 'Cross_Entropy_val', 'Accuracy_train', 'Accuracy_val' ]).to_csv(filepath, index=False)
from matplotlib import pyplot as plt from matplotlib.colors import ListedColormap import seaborn as sns #Project packages import config from utils import classes_names from table import best_results #Still beta, several updates required! #Best model path: best_path = os.path.join('results', 'mccv', 'baseline_over_SVC_linear_100.0', 'detailed_score.csv') mccv_path = config._get_path('mccv') graphics_path = config._get_path('graphics') def gs_heatmap(output_name='gs_table'): df, _ = best_results() c_map = plt.get_cmap('YlGnBu') c_map = ListedColormap(c_map(np.linspace(0.1, 0.7, 256))) fig, ax = plt.subplots(figsize=(12, 7)) heat = sns.heatmap(df, annot=True, linewidths=1, cmap=c_map,