Esempio n. 1
0
def test(top_models = 3):
    
    horizontal = [int(x.split('.')[0]) for x in df.columns.values]
    
    y = np.round(100*model.predict_proba(df), 2)
    #pred = encoder.inverse_transform(np.argmax(y , axis = 1))
    idx = y.argsort()[:,::-1][:,:top_models]
    
    label_path = os.path.join(config._get_path('graphics'), 'labeling')
    
    min_array = []
    mean_array = []
    max_array = []
    
    for cl in range(len(encoder.classes_)):
        min_array.append(np.min(all_y[cl]))
        mean_array.append(np.mean(all_y[cl]))
        max_array.append(np.max(all_y[cl]))
        
    
    if not os.path.exists(label_path):
        os.makedirs(label_path)
        
    #https://github.com/matplotlib/matplotlib/issues/8519#issuecomment-608434198        
    plt.ioff()
    
    for i in range(idx.shape[0]):
        
        sample = df.iloc[i]
        k = 1
        plt.figure(figsize=(top_models*5, 7))
            
        for j in idx[i]:
            
             m = encoder.inverse_transform([j])[0]
                         
             plt.subplot(1, top_models, k)
        
             plt.title("{}, Probability: {}%".format(m, y[i][j]))
             plt.xlabel("Wavelenght (1/cm)")
             
             plt.plot(horizontal, sample.values, '-', 
                      color= purple, label= "Sample")
                          
             plt.plot(horizontal, mean_array[j], '-', color= blue, label= m)
             plt.fill_between(horizontal, min_array[j], max_array[j], 
                              alpha= 0.25,color=blue) 
             plt.legend(loc="best")
        
             k += 1
        
        sample = None             
            
        plt.savefig(os.path.join(label_path, 'prediction_{}.png'.format(i)),
                    dpi = 300, 
                    bbox_inches = "tight")
        plt.close()

    return None
Esempio n. 2
0
#Sklearn Model Selection
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

#Project modules
from utils import file_name as f_name
from utils import append_time
from param_grid import neural_grid, classical_grid
from pipeline import build_pipe
from baseline import als

#Config module
import config

seed = config._seed()
gs_folder = config._get_path('grid_search')


def search(scaler='',
           baseline=True,
           pca=True,
           over_sample=True,
           param_grid=classical_grid(),
           prefix='',
           n_jobs=1,
           save=True):

    print('Loading training set...')
    X_train = pd.read_csv(os.path.join('data', 'X_train.csv'))
    y_train = pd.read_csv(os.path.join('data', 'y_train.csv')).values.ravel()
Esempio n. 3
0
def best_results():

    estimator_path = {}

    models = {}

    for scaler, baseline, pca, over, nn in product([False, True], repeat=5):

        file_name = f_name(nn=nn,
                           baseline=baseline,
                           scaler=scaler,
                           pca=pca,
                           over_sample=over)

        file_path = os.path.join(config._get_path('grid_search'), file_name)

        if os.path.isfile(file_path):

            df = pd.read_csv(file_path)
            replace = True

            if nn:

                row = df.iloc[0]

                if 'NN' in models:
                    if models['NN'][4] <= -row["neg_log_loss"]:
                        replace = False

                if replace:

                    models['NN'] = [
                        int(baseline),
                        int(scaler),
                        int(pca),
                        int(over), -row["neg_log_loss"], row["std"]
                    ]

                    estimator_path['NN'] = file_path
            else:

                for key in classical_models:

                    row = next(r for _, r in df.iterrows()
                               if key in r["estimator"])

                    replace = True

                    if classical_models[key] in models:
                        if (models[classical_models[key]][4] <=
                                -row["neg_log_loss"]):
                            replace = False

                    if replace:
                        models[classical_models[key]] = [
                            int(baseline),
                            int(scaler),
                            int(pca),
                            int(over), -row["neg_log_loss"], row["std"]
                        ]
                        estimator_path[key] = file_path
        else:
            print(file_name + " does not exists, please run the gridSearch!")
            break

    data = []
    idxs = []

    for key in models:
        data.append(models[key])
        idxs.append(key)

    df = pd.DataFrame(data=data,
                      columns=[
                          "Baseline", "Standard scaler", "PCA (99%)",
                          "Over sample", "Log-loss", "Standard Deviation"
                      ],
                      index=idxs)

    df.sort_values(by=["Log-loss"], inplace=True)

    return df, estimator_path
Esempio n. 4
0
#load project modules
import config
from param_grid import build_nn
from utils import append_time, build_row
from baseline import als

from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

#Beta version!

mccv_path = config._get_path('mccv')


def results_total(X, name, sufix, temp=True):  #Arrumar!

    posfix = '_temp' if temp else ''

    filepath = os.path.join(mccv_path, name, sufix + posfix + '.csv')

    pd.DataFrame(data=X,
                 columns=[
                     'Cross_Entropy_train', 'Cross_Entropy_val',
                     'Accuracy_train', 'Accuracy_val'
                 ]).to_csv(filepath, index=False)

Esempio n. 5
0
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns

#Project packages
import config
from utils import classes_names
from table import best_results

#Still beta, several updates required!

#Best model path:
best_path = os.path.join('results', 'mccv', 'baseline_over_SVC_linear_100.0',
                         'detailed_score.csv')

mccv_path = config._get_path('mccv')
graphics_path = config._get_path('graphics')


def gs_heatmap(output_name='gs_table'):

    df, _ = best_results()

    c_map = plt.get_cmap('YlGnBu')
    c_map = ListedColormap(c_map(np.linspace(0.1, 0.7, 256)))

    fig, ax = plt.subplots(figsize=(12, 7))
    heat = sns.heatmap(df,
                       annot=True,
                       linewidths=1,
                       cmap=c_map,