Esempio n. 1
0
def find_best_model_over_scan_logs(metric_weight='val_f1', *filepaths):
    """
    Finds the best model against multiple Talos scanned configurations.
    The scan configuration that has the maximum <metric_weight> is
    the one that is described as the best.

    :param metric_weight: It describes the evaluation metric that will be user
                          as a qualifier for the best model.
    :param filepaths: An iterable that will contains the correct filepaths of the
                      saved Talos configurations

    :return (dict): A dictionary with the best model configuration.
    """
    assert metric_weight is not None, "Argument <metric_weight> can not be None."
    assert isinstance(filepaths,
                      Iterable), "Argument <filepaths> must be iterable "

    # Cre
    config_pd = pd.concat(map(lambda file: Reporting(file).data, filepaths))
    config_pd.index = range(config_pd.shape[0])

    best_model_idx = config_pd[metric_weight].idxmax()
    best_model = config_pd.loc[best_model_idx].to_dict()

    for key, value in best_model.items():
        if isinstance(value, float) and value >= 1:
            best_model[key] = int(value)
        elif value == 'False' or value == 'True':
            best_model[key] = value == 'True'
    return best_model
Esempio n. 2
0
                      params=p,
                      model=the_network,
                      fraction_limit=0.01,
                      experiment_name='boy_4_12')
analyze_object = ta.Analyze(scan_object)

# Plot 'training' bars

analyze_object.plot_bars('batch_size', 'mse', 'lr', 'first_neuron')

# Run reporting on training log - to be changed accordingly

from talos import Reporting

r = Reporting(
    os.path.join(r'C:\Users\Szymek\Documents\magisterka_1\boy_4_12',
                 '120419221138.csv').replace(os.sep, '/'))

# Find the lowest score

r.low(metric='loss')

# Find the round number with the lowest score

r.data.loss.idxmin()

# Plot the loss function across epochs

r.plot_line(metric='loss')

# Plot correlation matrix
Esempio n. 3
0
    model.compile(optimizer=params['optimizer'],
                  loss=params['losses'],
                  metrics=['acc'])

    out = model.fit(x_train,
                    y_train,
                    batch_size=params['batch_size'],
                    epochs=params['epochs'],
                    verbose=0,
                    validation_data=[x_val, y_val])
    # callbacks=[live()])
    return out, model


# and run the scan
h = ta.Scan(x,
            y,
            params=p,
            dataset_name='first_test',
            experiment_no='aaa',
            model=iris_model,
            grid_downsample=0.5,
            print_params=True)

r = Reporting('first_test_aaa.csv')

# draws a histogram for 'val_acc'
r.plot_hist()

# Deploy(h, 'experiment_name')
Esempio n. 4
0
    def HyperReport(self,eval_criterion='val_loss'):
        """
        Reports the model from csv file of previous scan
        Plot several quantities and comparisons in dir /$name/
        Selects the best models according to the eval_criterion (val_loss or eval_error)
        Reference :
        """
        logging.info(' Starting reporting '.center(80,'-'))

        # Get reporting #
        report_file = os.path.join('model',self.name+'.csv')
        if os.path.exists(report_file):
            r = Reporting(report_file)
        else:
            logging.critical('Could not find %s'%(report_file))
            sys.exit(1)

        # returns the results dataframe
        logging.info('='*80)
        logging.info('Complete data after n_round = %d'%(r.rounds()))
        logging.debug(r.data)

        # Lowest eval_error #
        logging.info('-'*80)
        if eval_criterion == 'eval_error':
            logging.info('Lowest eval_error = %0.5f obtained after %0.f rounds'%(r.low('eval_mean'),r.rounds2high('eval_mean')))
        elif eval_criterion == 'val_loss':
            logging.info('Lowest val_loss = %0.5f obtained after %0.f rounds'%(r.low('val_loss'),r.rounds2high('val_loss')))
        else:
            logging.critical('Could not find evaluation criterion "%s" in the results'%eval_criterion)
            sys.exit(1)

        # Best params #
        logging.info('='*80)
        logging.info('Best parameters sets')
        if eval_criterion == 'eval_error':
            sorted_data = r.data.sort_values('eval_mean',ascending=False)
        elif eval_criterion == 'val_loss':
            sorted_data = r.data.sort_values('val_loss',ascending=False)

        for i in range(0,10):
            logging.info('-'*80)
            logging.info('Best params no %d'%(i+1))
            try:
                logging.info(sorted_data.iloc[i])
            except:
                logging.warning('\tNo more parameters')
                break
        # Hist in terminal #
        eval_mean_arr = r.data['eval_mean'].values
        val_loss_arr = r.data['val_loss'].values
        fig1 = plotille.Figure()
        fig1.width = 150
        fig1.height = 50
        fig1.set_x_limits(min_=np.amin(eval_mean_arr),max_=np.amax(eval_mean_arr))
        fig1.color_mode = 'byte'
        fig1.histogram(eval_mean_arr, bins=200, lc=25)
        print ('  Evaluation error  '.center(80,'-'))
        print ('Best model : ',sorted_data.iloc[0][['eval_mean']])
        print(fig1.show(legend=True))

        fig2 = plotille.Figure()
        fig2.width = 150
        fig2.height = 50
        fig2.set_x_limits(min_=np.amin(val_loss_arr),max_=np.amax(val_loss_arr))
        fig2.color_mode = 'byte'
        fig2.histogram(val_loss_arr, bins=200, lc=100)
        print ('  Val loss  '.center(80,'-'))
        print ('Best model : ',sorted_data.iloc[0][['val_loss']])
        print(fig2.show(legend=True))

        logging.info('='*80)

        # Generate dir #
        path_plot = os.path.join(parameters.main_path,'model',self.name)
        if not os.path.isdir(path_plot):
            os.makedirs(path_plot)
        
        logging.info('Starting plots')
        # Make plots #
        PlotScans(data=r.data,path=path_plot,tag='')
Esempio n. 5
0
    print("test size", len(pred))
    confusion_matrix(y_test, pred)

result = None

x_train, x_test, y_train, y_test = training_processing(*basic_processing())
pred = result.best_model().predict([x_test.values])>0.5
print("final test score", accuracy_score(pred, y_test))
print("test size", len(pred))
confusion_matrix(y_test, pred)


if False:
    runfile('/home/gbfm/Workspaces/PyCharm/ANN/ann.py', wdir='/home/gbfm/Workspaces/PyCharm/ANN')
    data = training_processing(*basic_processing())

    result = do_nn2_talos(*data)
    do_nn1(*data)
    do_nn2(*data)
    do_sklearn(MLPClassifier(), *data)
    do_sklearn(xgboost.XGBClassifier(), *data)

    report = Reporting(result)
    f = "n_layers learning_rate round_epochs n_nodes batch_size".split(" ")
    history = report.data.sort_values("val_acc", ascending=False)
    y = report.data["val_acc"]
    x = report.data
    lineplot(x[f[4]],y)
    plt.show()