def main(): """Main method""" data_dir = '../data/01_raw' interim_dir = '../data/02_interim' processed_dir = '../data/03_processed' results_dir = '../results' data_config = { 'data_dir': data_dir, 'interim_dir': interim_dir, 'processed_dir': processed_dir, 'processed_dim': [512, 512], 'data_labels': ['depth', 'target'], 'delimiter': '\t', 'used_data': 0.6, # Percentage of used original data measurements 'random_seed': 1234, 'batch_size': 4, 'test_size': 0.2 } model_config = { 'results_dir': results_dir, 'output_size': 2, 'arch': 'vgg16', 'loss': 'BCELoss', 'max_iter': 100, 'learning_rate': 0.01, 'pretrained': True } misc.gen_dirs([data_dir, interim_dir, processed_dir, results_dir]) data_processor = DataProcessor(data_config)
def main(): """Main method""" misc.to_local_dir(__file__) misc.gen_dirs([data_dir, processed_dir, plot_dir, model_dir, results_dir]) # data = np.load(f"{processed_dir}/processed_data.npy") np.set_printoptions(suppress=True) data = processing(store=True, plot=False) # Train/test split # train_data, test_data = train_test_split(data, test_size=test_size, random_state=random_seed) train_data = np.empty( (np.shape(data)[0] - len(test_configs), np.shape(data)[1], np.shape(data)[2])) test_data = np.empty( (len(test_configs), np.shape(data)[1], np.shape(data)[2])) test_idx = 0 train_idx = 0 for config in test_configs: for scenario in data: if (scenario[0, 0] == config[0] and scenario[0, 1] == config[1] and scenario[0, 2] == config[2]): test_data[test_idx] = scenario test_idx += 1 for scenario in data: test_found = False for config in test_configs: if (scenario[0, 0] == config[0] and scenario[0, 1] == config[1] and scenario[0, 2] == config[2]): test_found = True if not test_found: train_data[train_idx] = scenario train_idx += 1 # Flatten training data by one dimension but keep the shape of the testing data, # to being able to test different FRFs separately train_data = np.reshape(train_data, (-1, input_size + output_size)) # Scale data x_scaler = MinMaxScaler() train_data[:, :input_size] = x_scaler.fit_transform( train_data[:, :input_size]) for test_idx, __ in enumerate(test_data): test_data[test_idx, :, :input_size] = x_scaler.transform( test_data[test_idx, :, :input_size]) hyperopts = training(train_data) total_errors = np.empty((len(hyperopts), output_size)) total_variances = np.empty((len(hyperopts), output_size)) for hyper_idx, hyperopt in enumerate(hyperopts): errors, variances = test_frf(hyperopt, test_data, x_scaler) total_errors[hyper_idx] = errors total_variances[hyper_idx] = variances dump( hyperopt, '{}/hyperopt_{}.joblib'.format( model_dir, hyperopt[0].best_estimator_.__class__.__name__)) write_results(hyperopts, total_errors, total_variances)
def main(): """Main method""" misc.to_local_dir(__file__) misc.gen_dirs([data_dir, processed_dir, plot_dir, results_dir]) # Definition of data and learning properties np.set_printoptions(suppress=True) # data = np.load(f"{processed_dir}/processed_osc.npy") n_neighbors = range(10, 21) if OSC: train_data, test_data = train_test_osc() best_score = 0 best_nbrs = 0 for local_nbrs in n_neighbors: __, __, __, score = interpolate_osc(train_data, test_data, local_nbrs) if score > best_score: best_score = score best_nbrs = local_nbrs freq_errors, gamma_errors, mass_errors, __ = interpolate_osc( train_data, test_data, best_nbrs) # print("Frequency error: {} +/- {}".format(np.mean(freq_errors), np.std(freq_errors))) # print("Gamma error: {} +/- {}".format(np.mean(gamma_errors), np.std(gamma_errors))) # print("Mass error: {} +/- {}".format(np.mean(mass_errors), np.std(mass_errors))) print(r"{:.2f} \pm {:.2f} & {:.2f} \pm {:.2f} & {:.2f} \pm {:.2f}". format(np.mean(freq_errors), np.std(freq_errors), np.mean(gamma_errors), np.std(gamma_errors), np.mean(mass_errors), np.std(mass_errors))) else: train_data, test_data = train_test_frf() best_score = 0 best_nbrs = 0 for local_nbrs in n_neighbors: __, __, score = interpolate_frf(train_data, test_data, local_nbrs) if score > best_score: best_score = score best_nbrs = local_nbrs errors, variances, __ = interpolate_frf(train_data, test_data, best_nbrs) print( r"{:.2f} \pm {:.2f} & {:.2f} \pm {:.2f} & {:.2f} \pm {:.2f} & {:.2f} \pm {:.2f}" .format(errors[0], variances[0], errors[1], variances[1], errors[2], variances[2], errors[3], variances[3]))
def main(): """Main method""" misc.to_local_dir(__file__) misc.gen_dirs([data_dir, plot_dir, model_dir, results_dir]) generate_data()
def main(): """Main method""" misc.to_local_dir(__file__) misc.gen_dirs([data_dir, processed_dir, plot_dir, model_dir, results_dir]) # Definition of data and learning properties np.set_printoptions(suppress=True) # data = np.load(f"{processed_dir}/processed_osc.npy") data = read_osc(store=True) # Train/test split # train_data, test_data = train_test_split(data, test_size=test_size, random_state=random_seed) train_data = np.empty( (np.shape(data)[0] - len(test_configs), np.shape(data)[1])) test_data = np.empty((len(test_configs), np.shape(data)[1])) test_idx = 0 train_idx = 0 for config in test_configs: for row in data: if row[0] == config[0] and row[1] == config[1] and row[ 2] == config[2]: test_data[test_idx] = row test_idx += 1 for row in data: test_found = False for config in test_configs: if row[0] == config[0] and row[1] == config[1] and row[ 2] == config[2]: test_found = True if not test_found: train_data[train_idx] = row train_idx += 1 # import os # import math # from sklearn.metrics import mean_squared_error # regre = ['ElasticNet', 'RandomForestRegressor', 'XGBRegressor'] # pred_dir = '{}/dmg_hsc75linear/osc_fitting_hypsrch300_predictions_2'.format(results_dir) # for reg in regre: # files = [ # filename for filename in os.listdir(pred_dir) # if filename.startswith(reg) # ] # freq_errors = [] # gamma_errors = [] # mass_errors = [] # for config_idx, config in enumerate(test_configs): # local_test = test_data[config_idx] # local_pred_file = files[0] # for idx in range(1, len(files)): # file_config = os.path.splitext(files[idx])[0].split('_')[2:] # if np.all([float(file_config[jdx]) == config[jdx] for jdx in range(len(config))]): # local_pred_file = files[idx] # local_pred = np.load('{}/{}'.format(pred_dir, local_pred_file)) # local_test = local_test[input_size:] # freq_error = math.sqrt(mean_squared_error(local_test[::3], local_pred[::3])) # gamma_error = math.sqrt(mean_squared_error(local_test[1::3], local_pred[1::3])) # mass_error = math.sqrt(mean_squared_error(local_test[2::3], local_pred[2::3])) # freq_errors.append(freq_error) # gamma_errors.append(gamma_error) # mass_errors.append(mass_error) # print(reg) # # print("Frequency error: {} +/- {}".format(np.mean(freq_errors), np.std(freq_errors))) # # print("Gamma error: {} +/- {}".format(np.mean(gamma_errors), np.std(gamma_errors))) # # print("Mass error: {} +/- {}".format(np.mean(mass_errors), np.std(mass_errors))) # print(r"{:.2f} \pm {:.2f} & {:.2f} \pm {:.2f} & {:.2f} \pm {:.2f}".format( # np.mean(freq_errors), np.std(freq_errors), # np.mean(gamma_errors), np.std(gamma_errors), # np.mean(mass_errors), np.std(mass_errors) # )) # quit() # Scale data x_scaler = MinMaxScaler() train_data[:, :input_size] = x_scaler.fit_transform( train_data[:, :input_size]) test_data[:, :input_size] = x_scaler.transform(test_data[:, :input_size]) hyperopts = training(train_data) total_errors = np.empty((len(hyperopts), output_size)) total_variances = np.empty((len(hyperopts), output_size)) for hyper_idx, hyperopt in enumerate(hyperopts): errors, variances = test_osc(hyperopt, test_data, test_configs) total_errors[hyper_idx] = errors total_variances[hyper_idx] = variances dump( hyperopt, '{}/hyperopt_{}.joblib'.format( model_dir, hyperopt[0].best_estimator_.__class__.__name__)) write_results(hyperopts, total_errors, total_variances)