def Optimization(): scan_object = ta.Scan(x=x_train, y=y_train, params=parameters, model=pet_finder_model, val_split=0, experiment_name='pet_finder') # Evaluate analyze_object = ta.Analyze(scan_object) scan_data = analyze_object.data # heatmap correlation analyze_object.plot_corr('val_accuracy', ['accuracy', 'loss', 'val_loss']) # a four dimensional bar grid ast.bargrid(scan_data, x='lr', y='val_accuracy', hue='num_Nodes', row='loss_function', col='dropout') list_of_parameters = analyze_object.table('val_loss', ['accuracy', 'loss', 'val_loss'], 'val_accuracy') return list_of_parameters
def main(): p = get_params() data,labels = data_treatment(Path('./landmarks')) t = talos.Scan(x=data, y=labels, model=compile_and_train, params=p, experiment_name='talos_lstm') analyze_object = talos.Analyze(t) print('Los mejores resultados han sido') print(analyze_object.high('val_acc')) return analyze_object
def test_analyze(scan_object): '''Tests all the attributes available in the Reporting() object''' print('\n >>> Start Analyze()... \n') import talos import glob # for now test with old name r = talos.Reporting(scan_object) # read from file list_of_files = glob.glob('./test_latest/' + '/*.csv') r = talos.Reporting(list_of_files[-1]) # and then from scan object r = talos.Analyze(scan_object) # test the object properties r.best_params('val_loss', ['val_acc']) r.correlate('val_loss', ['val_acc']) r.data r.high('val_acc') r.low('val_acc') # r.plot_bars('first_neuron', 'val_acc', 'dropout', 'hidden_layers') r.plot_box('first_neuron', 'val_acc') r.plot_corr('val_loss', ['val_acc']) r.plot_hist('val_acc') r.plot_kde('val_acc') r.plot_line('val_acc') r.plot_regs('val_acc', 'val_loss') r.rounds() r.rounds2high('val_acc') r.table('val_loss', ['val_acc']) print('finish Analyze() \n')
print("access the summary details") print(scan_object.details) print("accessing the saved models") print(scan_object.saved_models) print("accessing the saved weights for models") print(scan_object.saved_weights) ############################################################################# # Analysing the Scan results with reporting # ----------------------------------------- # print("use Scan object as input") analyze_object = talos.Analyze(scan_object) print("access the dataframe with the results") print(analyze_object.data) print("get the number of rounds in the Scan") print(analyze_object.rounds()) print("et the highest result for any metric") print(analyze_object.high('val_accuracy')) print("get the round with the best result") print(analyze_object.rounds2high('val_accuracy')) print("get the best paramaters") print(
def plotResults(self, scan_object=None, analyze_file=None): analyze_object = None if (scan_object != None): analyze_object = talos.Analyze(scan_object) if (analyze_file != None): analyze_object = talos.Reporting(analyze_file) if (analyze_object == None): pass else: print("Results:") print(analyze_object.data) print("") print("Rounds:") print(analyze_object.rounds()) print("") print("Highest accuracy:") print(analyze_object.high('val_acc')) print("") print("Lowest (not null) loss:") print(analyze_object.low('val_loss')) print("") print("Round with best results (val_acc):") print(analyze_object.rounds2high('val_acc')) print("") best_params = analyze_object.best_params('val_acc', []) print("Best parameters (val_acc) rank:") print(best_params) print("") print("Best params:") print(best_params[0]) print("") print("Best parameters (val_loss) rank:") print( analyze_object.best_params('val_loss', ['acc', 'loss', 'val_loss'])) print("") # line plot analyze_object.plot_line('val_acc') # line plot analyze_object.plot_line('val_loss') # a regression plot for two dimensions analyze_object.plot_regs('val_acc', 'val_loss') # up to two dimensional kernel density estimator analyze_object.plot_kde('val_acc') # up to two dimensional kernel density estimator analyze_object.plot_kde('val_loss') # a simple histogram) analyze_object.plot_hist('val_acc', bins=40)
callbacks=[cb]) return history, n_network plot_loss(history.history['loss'], history.history['val_loss']) # Run randomized search using Talos library with 0.01 fraction limit scan_object = ta.Scan(X_train, y_train, params=p, model=the_network, fraction_limit=0.01, experiment_name='boy_4_12') analyze_object = ta.Analyze(scan_object) # Plot 'training' bars analyze_object.plot_bars('batch_size', 'mse', 'lr', 'first_neuron') # Run reporting on training log - to be changed accordingly from talos import Reporting r = Reporting( os.path.join(r'C:\Users\Szymek\Documents\magisterka_1\boy_4_12', '120419221138.csv').replace(os.sep, '/')) # Find the lowest score
import pdb import pandas as pd import sys sys.path.insert(0, '../main/') import plotting_functions as pl import model as ml output_dir = '../../plots/Ensemble-Sectors_2_3/' # report_path = './TESS-unsupervised/072320203633.csv' # report_path = './TESS-unsupervised/080220093603.csv' report_path = './DAE/100120203712.csv' # >> load experiment log # res = pd.read_csv(report_path, header=0, usecols=list(range(20))) analyze_object = talos.Analyze(report_path) df, best_param_ind, p = pl.hyperparam_opt_diagnosis(analyze_object, output_dir, supervised=False) pdb.set_trace() print(df['losses']) # loss_name = 'LogCosh' loss_name = 'MeanAbsolutePercentageError' inds = [] for i in range(len(df['losses'])): if loss_name in df['losses'][i]: inds.append(i) best_ind = inds[np.argmin(df['val_loss'][inds])] print(df.iloc[best_ind]) pdb.set_trace()
scan_obj = talos.Scan( next(train_generator), next(validation_generator), x_val=next(train_generator), y_val=next(validation_generator), params=params, model=retinal_model, experiment_name='eye2gene', print_params=True, ) scan_obj.data.head() scan_obj.learning_entropy scan_obj.details ana_obj = talos.Analyze(scan_obj) print(ana_obj.best_params('val_acc', ['acc', 'loss', 'val_loss'])) # Save model #model.load_weights(best_model_name) #model.save('trained_models/' + model_name + '.h5') """ # Generate some performance graphs of loss and accuracy figure, plots = plt.subplots(2, 1, figsize=(5, 10)) ax1 = plots[0] ax2 = plots[1] ax1.plot(history.history['accuracy']) ax1.plot(history.history['val_accuracy']) ax1.title.set_text('Model accuracy') ax1.set_ylabel('accuracy') ax1.set_xlabel('epoch') ax1.legend(['train', 'val'], loc='upper left')
def autoencoder(self, sectors = [27], fast = False, model_init = None, train_test_ratio = 0.9, hyperparameter_optimization = False, lib_dir = None, database_dir = None, single_file = False, simbad_database_dir = '', run_model = True, iterative = False, diag_plots = True, novelty_detection=True, classification_param_search=False, classification=True, norm_type = 'standardization', input_rms=True, input_psd=False, n_pgram = 50): data_dir = self.datapath output_dir = self.CAEpath mom_dump = self.momdumpcsv sectors = [self.sector] cams = self.cams ccds = self.ccds fast=fast # weights init # model_init = output_dir + 'model' model_init = model_init # train_test_ratio = 0.1 # >> fraction of training set size to testing set size train_test_ratio = train_test_ratio # >> runs DBSCAN on learned features # >> normalization options: # * standardization : sets mean to 0. and standard deviation to 1. # * median_normalization : divides by median # * minmax_normalization : sets range of values from 0. to 1. # * none : no normalization load_psd=False # >> if psd_train.fits, psd_test.fits already exists use_tess_features = True use_tls_features = False input_features=False # >> this option cannot be used yet split_at_orbit_gap=False DAE = False # >> move targets out of training set and into testing set (integer) # !! TODO: print failure if target not in sector # targets = [219107776] # >> EX DRA # !! validation_targets = [] if sectors[0] == 1: custom_mask = list(range(800)) + list(range(15800, 17400)) + list(range(19576, 20075)) elif 4 in sectors: custom_mask = list(range(7424, 9078)) else: custom_mask = [] custom_masks = [list(range(500)) + list(range(15800, 17400)), []] # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: import talos # >> a hyperparameter optimization library import pdb import tensorflow as tf # tf.enable_eager_execution() import sys if lib_dir is not None: sys.path.insert(0, lib_dir) # >> needed if scripts not in current dir # >> hyperparameters if hyperparameter_optimization: p = {'kernel_size': [3,5], 'latent_dim': [25], 'strides': [2],# 3 'epochs': [5], 'dropout': [0.1, 0.2, 0.3, 0.4, 0.5], 'num_filters': [32,64,128], 'num_conv_layers': [4,6,8,10], 'batch_size': [128], 'activation': [tf.keras.activations.softplus, tf.keras.activations.selu, tf.keras.activations.relu, 'swish', tf.keras.activations.exponential, tf.keras.activations.elu, 'linear'], 'optimizer': ['adam', 'adadelta'], 'last_activation': ['linear'], 'losses': ['mean_squared_error'], 'lr': [0.001], 'initializer': ['random_normal'], 'num_consecutive': [2], 'kernel_regularizer': [None], 'bias_regularizer': [None], 'activity_regularizer': [None], 'pool_size': [1]} else: # >> strides: list, len = num_consecutive p = {'kernel_size': 3, 'latent_dim': 35, 'strides': 1, 'epochs': 5, 'dropout': 0.2, 'num_filters': 16, 'num_conv_layers': 4, 'batch_size': 64, 'activation': 'elu', 'optimizer': 'adam', 'last_activation': 'linear', 'losses': 'mean_squared_error', 'lr': 0.0001, 'initializer': 'random_normal', 'num_consecutive': 2, 'pool_size': 2, 'pool_strides': 2, 'units': [1024, 512, 64, 16], 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'fully_conv': False, 'encoder_decoder_skip': False, 'encoder_skip': False, 'decoder_skip': False, 'full_feed_forward_highway': False, 'cvae': False, 'share_pool_inds': False, 'batchnorm_before_act': True, 'concat_ext_feats': False} # -- create output directory -------------------------------------------------- if os.path.isdir(output_dir) == False: # >> check if dir already exists os.mkdir(output_dir) x_train, x_test, y_train, y_test, ticid_train, ticid_test, target_info_train, \ target_info_test, rms_train, rms_test, x = \ ml.autoencoder_preprocessing(self.flux, self.time, p, self.identifiers, self.target_info, mock_data=False, sector=sectors[0], validation_targets=validation_targets, norm_type=norm_type, input_rms=input_rms, input_psd=input_psd, load_psd=load_psd, n_pgram=n_pgram, train_test_ratio=train_test_ratio, split=split_at_orbit_gap, output_dir=output_dir, data_dir=data_dir, use_tess_features=use_tess_features, use_tls_features=use_tls_features) if input_psd: p['concat_ext_feats'] = True title='TESS-unsupervised' # == talos experiment ========================================================= if hyperparameter_optimization: print('Starting hyperparameter optimization...') t = talos.Scan(x=x_test, y=x_test, params=p, model=ml.conv_autoencoder, experiment_name=title, reduction_metric = 'val_loss', minimize_loss=True, reduction_method='correlation', fraction_limit = 0.001) # fraction_limit = 0.001 analyze_object = talos.Analyze(t) data_frame, best_param_ind,p = pf.hyperparam_opt_diagnosis(analyze_object, output_dir, supervised=False) # == run model ================================================================ if run_model: print('Training autoencoder...') history, model, x_predict = \ ml.conv_autoencoder(x_train, x_train, x_test, x_test, p, val=False, split=split_at_orbit_gap, ticid_train=ticid_train, ticid_test=ticid_test, save_model=True, predict=True, save_bottleneck=True, output_dir=output_dir, model_init=model_init) if split_at_orbit_gap: x_train = np.concatenate(x_train, axis=1) x_test = np.concatenate(x_test, axis=1) x_predict = np.concatenate(x_predict, axis=1) # == Plots ==================================================================== if diag_plots: print('Creating plots...') pf.diagnostic_plots(history, model, p, output_dir, x, x_train, x_test, x_predict, mock_data=False, addend=0., target_info_test=target_info_test, target_info_train=target_info_train, ticid_train=ticid_train, ticid_test=ticid_test, percentage=False, input_features=input_features, input_rms=input_rms, rms_test=rms_test, input_psd=input_psd, rms_train=rms_train, n_tot=40, plot_epoch = True, plot_in_out = True, plot_in_bottle_out=False, plot_latent_test = True, plot_latent_train = True, plot_kernel=False, plot_intermed_act=False, make_movie = False, plot_lof_test=False, plot_lof_train=False, plot_lof_all=False, plot_reconstruction_error_test=True, plot_reconstruction_error_all=False, load_bottleneck=True) # if input_psd: # x = x[0] for i in [0,1,2]: if i == 0: use_learned_features=True use_tess_features=False use_tls_features=False use_engineered_features=False use_rms=False description='_0_learned' DAE=False elif i == 1: use_learned_features=False use_tess_features=True use_tls_features=False use_engineered_features=False use_rms=False description='_1_ext' DAE_hyperparam_opt=True DAE=True p_DAE = {'max_dim': [9, 11, 13, 15, 17, 19], 'step': [1,2,3,4,5,6], 'latent_dim': [3,4,5], 'activation': ['relu', 'elu'], 'last_activation': ['relu', 'elu'], 'optimizer': ['adam'], 'lr':[0.001, 0.005, 0.01], 'epochs': [20], 'losses': ['mean_squared_error'], 'batch_size':[128], 'initializer': ['glorot_normal', 'glorot_uniform'], 'fully_conv': [False]} elif i == 2: use_learned_features=True use_tess_features=True use_tls_features=False use_engineered_features=False use_rms=True description='_2_learned_RMS_ext' DAE_hyperparam_opt=True DAE=True p_DAE = {'max_dim': list(np.arange(40, 70, 5)), 'step': [1,2,3,4,5,6], 'latent_dim': list(np.arange(12, 50, 5)), 'activation': ['relu', 'elu'], 'last_activation': ['relu', 'elu'], 'optimizer': ['adam'], 'lr':[0.001, 0.005, 0.01], 'epochs': [20], 'losses': ['mean_squared_error'], 'batch_size':[128], 'initializer': ['glorot_normal', 'glorot_uniform'], 'fully_conv': [False]} print('Creating feature space') if p['concat_ext_feats'] or input_psd: features, flux_feat, ticid_feat, info_feat = \ ml.bottleneck_preprocessing(sectors[0], np.concatenate([x_train[0], x_test[0]], axis=0), np.concatenate([ticid_train, ticid_test]), np.concatenate([target_info_train, target_info_test]), rms=np.concatenate([rms_train, rms_test]), data_dir=data_dir, bottleneck_dir=output_dir, output_dir=output_dir, use_learned_features=use_learned_features, use_tess_features=use_tess_features, use_engineered_features=use_engineered_features, use_tls_features=use_tls_features, use_rms=use_rms, norm=True, cams=cams, ccds=ccds, log=True) else: features, flux_feat, ticid_feat, info_feat = \ ml.bottleneck_preprocessing(sectors[0], np.concatenate([x_train, x_test], axis=0), np.concatenate([ticid_train, ticid_test]), np.concatenate([target_info_train, target_info_test]), rms=np.concatenate([rms_train, rms_test]), data_dir=data_dir, bottleneck_dir=output_dir, output_dir=output_dir, use_learned_features=True, use_tess_features=use_tess_features, use_engineered_features=False, use_tls_features=use_tls_features, use_rms=use_rms, norm=True, cams=cams, ccds=ccds, log=True) print('Plotting feature space') pf.latent_space_plot(features, output_dir + 'feature_space.png') if DAE: if DAE_hyperparam_opt: t = talos.Scan(x=features, y=features, params=p_DAE, model=ml.deep_autoencoder, experiment_name='DAE', reduction_metric = 'val_loss', minimize_loss=True, reduction_method='correlation', fraction_limit = 0.1) analyze_object = talos.Analyze(t) data_frame, best_param_ind,p_best = pf.hyperparam_opt_diagnosis(analyze_object, output_dir, supervised=False) p_DAE=p_best p_DAE['epochs'] = 100 else: p_DAE = {'max_dim': 50, 'step': 4, 'latent_dim': 42, 'activation': 'elu', 'last_activation': 'elu', 'optimizer': 'adam', 'lr':0.001, 'epochs': 100, 'losses': 'mean_squared_error', 'batch_size': 128, 'initializer': 'glorot_uniform', 'fully_conv': False} # p_DAE = {'max_dim': 9, 'step': 5, 'latent_dim': 4, # 'activation': 'elu', 'last_activation': 'elu', # 'optimizer': 'adam', # 'lr':0.01, 'epochs': 100, 'losses': 'mean_squared_error', # 'batch_size': 128, 'initializer': 'glorot_normal', # 'fully_conv': False} history_DAE, model_DAE = ml.deep_autoencoder(features, features, features, features, p_DAE, resize=False, batch_norm=True) new_features = ml.get_bottleneck(model_DAE, features, p_DAE, DAE=True) features=new_features pf.epoch_plots(history_DAE, p_DAE, output_dir) print('Plotting feature space') pf.latent_space_plot(features, output_dir + 'feature_space' + \ ''+'_DAE.png') if novelty_detection: print('Novelty detection') pf.plot_lof(x, flux_feat, ticid_feat, features, 20, output_dir, momentum_dump_csv = self.momdumpcsv, n_tot=200, target_info=info_feat, prefix=str(i), cross_check_txt=database_dir, debug=True, addend=0., single_file=single_file, log=True, n_pgram=n_pgram, plot_psd=True) if classification: if classification_param_search: df.KNN_plotting(output_dir +'str(i)-', features, [10, 20, 100]) print('DBSCAN parameter search') parameter_sets, num_classes, silhouette_scores, db_scores, ch_scores, acc = \ df.dbscan_param_search(features, x, flux_feat, ticid_feat, info_feat, DEBUG=False, output_dir=output_dir+str(i), simbad_database_txt=simbad_database_dir, leaf_size=[30], algorithm=['auto'], min_samples=[5], metric=['minkowski'], p=[3,4], database_dir=database_dir, eps=list(np.arange(1.5, 4., 0.1)), confusion_matrix=False, pca=False, tsne=False, tsne_clustering=False) print('Classification with best parameter set') best_ind = np.argmax(silhouette_scores) best_param_set = parameter_sets[best_ind] else: best_param_set=[2.0, 3, 'minkowski', 'auto', 30, 4] if classification_param_search: print('HDBSCAN parameter search') acc = df.hdbscan_param_search(features, x, flux_feat, ticid_feat, info_feat, output_dir=output_dir, p0=[3,4], single_file=single_file, database_dir=database_dir, metric=['all'], min_samples=[3], min_cluster_size=[3], data_dir=data_dir) else: # best_param_set = [3, 3, 'manhattan', None] best_param_set = [3, 3, 'canberra', None] print('Run HDBSCAN') _, _, acc = df.hdbscan_param_search(features, x, flux_feat, ticid_feat, info_feat, output_dir=output_dir, p0=[best_param_set[3]], single_file=single_file, database_dir=database_dir, metric=[best_param_set[2]], min_cluster_size=[best_param_set[0]], min_samples=[best_param_set[1]], DEBUG=True, pca=True, tsne=True, data_dir=data_dir, save=True) with open(output_dir + 'param_summary.txt', 'a') as f: f.write('accuracy: ' + str(np.max(acc))) df.gmm_param_search(features, x, flux_feat, ticid_feat, info_feat, output_dir=output_dir, database_dir=database_dir, data_dir=data_dir) from sklearn.mixture import GaussianMixture gmm = GaussianMixture(n_components=200) labels = gmm.fit_predict(features) acc = pf.plot_confusion_matrix(ticid_feat, labels, database_dir=database_dir, single_file=single_file, output_dir=output_dir, prefix='gmm-') pf.quick_plot_classification(x, flux_feat,ticid_feat,info_feat, features, labels,path=output_dir, prefix='gmm-', database_dir=database_dir, single_file=single_file) pf.plot_cross_identifications(x, flux_feat, ticid_feat, info_feat, features, labels, path=output_dir, database_dir=database_dir, data_dir=data_dir, prefix='gmm-') # == iterative training ======================================================= ml.iterative_cae(x_train, y_train, x_test, y_test, x, p, ticid_train, ticid_test, target_info_train, target_info_test, num_split=2, output_dir=output_dir, split=split_at_orbit_gap, input_psd=input_psd)
'convolutional': [16, 32, 128], 'kernel': [10, 20], 'conv_activation': [relu, elu], 'dense': [20, 50], 'optimizer': ['Adam'], 'epoch': [5, 10, 15], 'hidden_layers': [100, 500] } cnn_scan = talos.Scan(x=X, y=d_train_array, model=cnn_optimization, params=cnn_params, experiment_name='CNN_Optimization', round_limit=10, fraction_limit=0.05) cnn_analyze = talos.Analyze(cnn_scan) documentation_file_parameteropt.write( "CNN: Best parameters {}, reached score: {} \n".format( cnn_analyze.best_params('accuracy', ['accuracy', 'loss', 'val_loss']), cnn_analyze.high('accuracy'))) pred_cnn = talos.Predict(cnn_scan).predict(x_t, metric='val_f1score', asc=True) #evaluate the model cnn_evaluation_scores, cnn_cm = evaluation.multilabel_evaluation( d_test_array, label_binarizer.inverse_transform(pred_cnn), "CNN") documentation_file_modelopt.write(cnn_evaluation_scores) #deploy best model model_cnn = talos.Deploy(cnn_scan, "model_cnn_scibert", metric='val_accuracy') #build LSTM model and evaluate the model print("LSTM model evaluation")
def search_analysis(search_result): analyze_object = ta.Analyze(search_result) valacc = analyze_object['val_acc'].data best_index = valacc.idxmax(axis=1) print(analyze_object.loc[best_index, :])