def main(params): set_seeds(params.seed) shared_params = ("ffnn", "nn", params, gen_ffnn, flop_realnn) if params.search: network_depth = len([ int(layer) for layer in params.ffnn_struct.split("-") if layer.isdigit() ]) if params.search_width: for n_hidden in range(params.min_width, params.max_width + 1, params.step_width): params.ffnn_struct = (network_depth * (str(n_hidden) + "-"))[:-1] print("NETWORK STRUCTURE:", params.ffnn_struct) parameter_search_parallel(*shared_params, wrapper_train_parallel) else: print("NETWORK STRUCTURE:", params.ffnn_struct) parameter_search_parallel(*shared_params, wrapper_train_parallel) else: print("NETWORK STRUCTURE:", params.ffnn_struct) train_func = wrapper_train_parallel if params.mp else train single_run(*shared_params, train_func)
def train(params, batch_size, learning_rate, x_train, x_test, y_train, y_test, seed): """Training loop for the real-valued recurrent neural-network. """ set_seeds(seed) if params.search or params.mp: verbosity = 0 else: verbosity = 1 model = gen_rnn(params, batch_size, learning_rate) # Information on distribution of |r(n)|^2, mean and variance train_loss_list = np.zeros(params.n_epochs) # Normal loss is just MEAN squared of residuals train_var_list = np.zeros(params.n_epochs) # Variance of the |r(n)|^2 values test_loss_list = np.zeros(params.n_epochs) test_var_list = np.zeros(params.n_epochs) # Restructure input to match batch_size (required for a statefull network) if params.rnn_stateful: x_train = x_train[: x_train.shape[0] - (x_train.shape[0]) % batch_size] y_train = y_train[: y_train.shape[0] - (y_train.shape[0]) % batch_size] history = model.fit( x_train, np.hstack((y_train.real, y_train.imag)), epochs = params.n_epochs, batch_size = batch_size, verbose = verbosity, validation_data = (x_test, np.hstack((y_test.real, y_test.imag))), ) # Compensate for the additional counts performed for mean when split to real and imag # That is, mean is computed for array of size 2N so reduced too much train_loss_list = 2 * np.array(history.history["loss"]) test_loss_list = 2 * np.array(history.history["val_loss"]) if not params.search and not params.mp: print("Train Loss", train_loss_list[-1]) print("Test Loss", test_loss_list[-1]) print() if not os.path.exists("tmp_models"): os.makedirs("tmp_models") path = "tmp_models" + os.sep + str(uuid.uuid4().hex) model.save_weights(path, save_format="tf") result_dict = { "train_loss": train_loss_list, "train_var": train_var_list, "test_loss": test_loss_list, "test_var": test_var_list, "model": path, } return result_dict
def main(params): set_seeds(params.seed) shared_params = ("model_based_nn", "model_based_nn", params, gen_model_based_nn, flop_model_based_nn) print("NETWORK STRUCTURE:", params.htnn_struct) if params.search: if params.search_width: for max_power in range(params.min_width, params.max_width + 1, params.step_width): print("Max Power", max_power) params.max_power = max_power parameter_search_parallel(*shared_params, wrapper_train_parallel) else: parameter_search_parallel(*shared_params, wrapper_train_parallel) else: train_func = wrapper_train_parallel if params.mp else train single_run(*shared_params, train_func)
def parameter_search_parallel(name, model_type, params, model_func, flop_func, train_func): """Perform the parameter search in parallel by constructing all parameter points and seeds as a set of tasks. Parameters ---------- name : str Name of the model (model_based_nn, complex_rnn, rnn, complex_ffnn, ffnn). model_type : str Type of model (nn, model_based_nn). params : :obj: Parameters. model_func : :obj: Model generator function flop_func : :obj: Function to calculate FLOPs for model. train_func : :obj: Model training function. """ if model_type not in ["nn", "model_based_nn"]: raise ValueError("Specified model_type of model is incorrect") x_train, y_train, y_train_orig, x_test, y_test, y_test_orig, y_var, h_lin, y_canc_train, y_canc_test, noise, measured_noise_power = initialize( params) canc_lin = 10 * np.log10( np.mean(np.abs(y_test_orig)**2) / np.mean(np.abs(y_test_orig - y_canc_test)**2)) # If cross-validation, generate array for the folds if params.cv: x_train_cv, y_train_cv, x_val_cv, y_val_cv = gen_cv_data( params, x_train, y_train) seed_list = gen_seeds(params.seed, params.n_cv_seeds) # Swap to use fewer epochs n_epochs = params.n_epochs params.n_epochs = params.n_cv_epochs else: seed_list = gen_seeds(params.seed, params.n_seeds) # Generate all the tasks tasks = [] batch_size_list = [] learning_rate_list = [] if params.cv: print("PARAMETER SEARCH USING CV STARTING") else: print("PARAMETER SEARCH STARTING") set_seeds(params.seed) batch_size_list = np.random.randint(low=params.min_batch_size, high=params.max_batch_size, size=params.n_search_points) set_seeds(params.seed) learning_rate_list = np.random.uniform(low=params.min_learning_rate, high=params.max_learning_rate, size=params.n_search_points) for point in range(0, params.n_search_points): if params.cv: for fold in range(0, params.cv_folds): for seed in seed_list: tasks.append((params, batch_size_list[point], learning_rate_list[point], x_train_cv[fold], x_val_cv[fold], y_train_cv[fold], y_val_cv[fold], seed)) else: for seed in seed_list: tasks.append( (params, batch_size_list[point], learning_rate_list[point], x_train, x_test, y_train, y_test, seed)) result_array = parallel_train(params, tasks, train_func) # Get the mean test loss across multiple seeds to determine best hyperparameter test_loss_array = gen_array(result_array, "test_loss") if params.cv: test_loss_array = np.reshape(test_loss_array, (params.n_search_points, params.cv_folds, params.n_cv_seeds, params.n_epochs)) results_last = test_loss_array[..., -1] # All results but only last epoch results_last = np.mean( results_last, axis=-1) # Mean across CV seeds for final epochs results_last = np.mean(results_last, axis=-1) # Mean across folds for final epochs # Find the optimal points in the parameter space based on minimum loss opt_point_idx = np.nanargmin(results_last) params.batch_size = batch_size_list[opt_point_idx] params.learning_rate = learning_rate_list[opt_point_idx] params.n_epochs = n_epochs print("CROSS-VALIDATION COMPLETE: PARAMETERS FOUND") print("\tBatch-Size", params.batch_size) print("\tLearning-Rate", params.learning_rate) print() if params.search_training_size: training_sizes = np.arange( params.min_training_size, params.max_training_size + params.step_training_size, params.step_training_size) / 100 print("Training set sizes", training_sizes) for training_size in training_sizes: params.training_size = training_size # Rerun with these optimal parameters and other amount of seeds single_run(name, model_type, params, model_func, flop_func, train_func) else: # Rerun with these optimal parameters and other amount of seeds single_run(name, model_type, params, model_func, flop_func, train_func) else: test_loss_array = np.reshape( test_loss_array, (params.n_search_points, params.n_seeds, params.n_epochs)) results_last_epoch = test_loss_array[ ..., -1] # All results but only last epoch results_avg = np.mean( results_last_epoch, axis=-1) # Mean across seeds for final epochs for each parameter # Find the optimal points in the parameter space based on minimum loss opt_point_idx = np.nanargmin(results_avg) opt_seed_idx = np.nanargmin(results_last_epoch[opt_point_idx], axis=-1) params.batch_size = batch_size_list[opt_point_idx] params.learning_rate = learning_rate_list[opt_point_idx] print("PARAMETER SEARCH COMPLETE: PARAMETERS FOUND") print("\tBatch-Size", params.batch_size) print("\tLearning-Rate", params.learning_rate) print() # Get results and parameters for optimal result result_array = np.reshape(result_array, (params.n_search_points, params.n_seeds)) result_array = result_array[opt_point_idx] train_dict = gen_stat(gen_array(result_array, "train_loss"), gen_array(result_array, "train_var"), y_train, seed_list) test_dict = gen_stat(gen_array(result_array, "test_loss"), gen_array(result_array, "test_var"), y_test, seed_list) model_path = result_array[opt_seed_idx]["model"] model = gen_model(params, name) model.load_weights(model_path) y_hat = model(x_test) if model_type == "nn": save_data_nn(name, params, model, model_path, flop_func, train_dict, test_dict, canc_lin, seed_list) else: K1_array = gen_array(result_array, "K1") K2_array = gen_array(result_array, "K2") weights_array = gen_array(result_array, "weights") save_data_model_based_nn(name, params, model, model_path, flop_func, train_dict, test_dict, canc_lin, seed_list, K1_array, K2_array, weights_array) # Remove dir try: shutil.rmtree("tmp_models") except OSError as e: print("Error: %s - %s." % (e.filename, e.strerror))
def train(params, batch_size, learning_rate, x_train, x_test, y_train, y_test, seed): """Training loop for the complex-valued feed-forward neural-network. """ set_seeds(seed) optimizer = select_optimizer(params) model = gen_complex_ffnn(params) f_train = tf.function(step) n_batches = x_train.shape[0] // batch_size # Information on distribution of |r(n)|^2, mean and variance train_loss_list = np.zeros( params.n_epochs) # Normal loss is just MEAN squared of residuals train_var_list = np.zeros( params.n_epochs) # Variance of the |r(n)|^2 values test_loss_list = np.zeros(params.n_epochs) test_var_list = np.zeros(params.n_epochs) for epoch in range(0, params.n_epochs): train_loss = 0 for batch in range(0, n_batches): first_index = batch * batch_size second_index = first_index + batch_size input = x_train[first_index:second_index] target = y_train[first_index:second_index] loss = f_train(model, optimizer, params.gradient_clipping, input, target) train_loss += loss test_out = model(x_test) test_loss = complex_mean_squared_error(y_test, test_out) train_loss /= n_batches train_loss_list[epoch] = train_loss test_loss_list[epoch] = test_loss train_var_list[epoch] = np.var( np.abs(y_train - model(x_train).numpy())**2) test_var_list[epoch] = np.var(np.abs(y_test - test_out.numpy())**2) if not params.search and not params.mp: print("Epoch", epoch + 1) print("Train Loss", train_loss_list[epoch]) print("Test Loss", test_loss_list[epoch]) print() if not params.search and not params.mp: print("Train Loss", train_loss_list[-1]) print("Test Loss", test_loss_list[-1]) print() if not os.path.exists("tmp_models"): os.makedirs("tmp_models") path = "tmp_models" + os.sep + str(uuid.uuid4().hex) model.save_weights(path, save_format="tf") result_dict = { "train_loss": train_loss_list, "train_var": train_var_list, "test_loss": test_loss_list, "test_var": test_var_list, "model": path, } return result_dict
def train(params, batch_size, learning_rate, x_train, x_test, y_train, y_test, seed): """Training loop for the complex-valued model-based neural-network. """ set_seeds(seed) network_array = params.htnn_struct.split("-") network_array = [layer for layer in network_array if layer != "-"] optimizer = select_optimizer(params) model = gen_model_based_nn(params) f_train = tf.function(step) n_batches = x_train.shape[0] // batch_size # Information on distribution of |r(n)|^2, mean and variance train_loss_list = np.zeros( params.n_epochs) # Normal loss is just MEAN squared of residuals train_var_list = np.zeros( params.n_epochs) # Variance of the |r(n)|^2 values test_loss_list = np.zeros(params.n_epochs) test_var_list = np.zeros(params.n_epochs) for epoch in range(0, params.n_epochs): train_loss = 0 for batch in range(0, n_batches): first_index = batch * batch_size second_index = first_index + batch_size input = x_train[first_index:second_index] target = y_train[first_index:second_index] loss = f_train(model, optimizer, params.gradient_clipping_param1, params.gradient_clipping_param2, params.gradient_clipping, network_array, input, target) train_loss += loss test_out = model(x_test) test_loss = complex_mean_squared_error(y_test, test_out) train_loss /= n_batches train_loss_list[epoch] = train_loss test_loss_list[epoch] = test_loss train_var_list[epoch] = np.var( np.abs(y_train - model(x_train).numpy())**2) test_var_list[epoch] = np.var(np.abs(y_test - test_out.numpy())**2) if not params.search and not params.mp: print("Epoch", epoch + 1) print("Train Loss", train_loss_list[epoch]) print("Test Loss", test_loss_list[epoch]) print("Test output variance", np.var(test_out.numpy())) print("Test output mean", np.mean(test_out.numpy())) print() if True in np.isnan(test_loss_list): print("Warning: NaNs present in loss") if not os.path.exists("tmp_models"): os.makedirs("tmp_models") path = "tmp_models" + os.sep + str(uuid.uuid4().hex) model.save_weights(path, save_format="tf") for layer in model.layers: if layer.__class__.__name__ == "IQMixer": if network_array[0] == "T1": K1_real = np.cos(layer.phase.numpy()) K1_imag = layer.gain.numpy() * np.sin(layer.phase.numpy()) K1 = K1_real + 1j * K1_imag K2_real = layer.gain.numpy() * np.cos(layer.phase.numpy()) K2_imag = -np.sin(layer.phase.numpy()) K2 = K2_real + 1j * K2_imag elif network_array[0] == "T2": K1_real = np.cos(layer.phase.numpy()) K1_imag = -layer.gain.numpy() * np.sin(layer.phase.numpy()) K1 = K1_real + 1j * K1_imag K2_real = layer.gain.numpy() * np.cos(layer.phase.numpy()) K2_imag = -np.sin(layer.phase.numpy()) K2 = K2_real + 1j * K2_imag elif network_array[0] == "E": K1_real = ( 1 + layer.gain.numpy() * np.cos(layer.phase.numpy())) / 2 K1_imag = (layer.gain.numpy() * np.sin(layer.phase.numpy())) / 2 K1 = K1_real + 1j * K1_imag K2_real = ( 1 - layer.gain.numpy() * np.cos(layer.phase.numpy())) / 2 K2_imag = -(layer.gain.numpy() * np.sin(layer.phase.numpy())) / 2 K2 = K2_real + 1j * K2_imag elif network_array[0] == "C" or network_array[0] == "R": K1_real = layer.K1_real.numpy() K1_imag = layer.K1_imag.numpy() K1 = K1_real + 1j * K1_imag K2_real = layer.K2_real.numpy() K2_imag = layer.K2_imag.numpy() K2 = K2_real + 1j * K2_imag if layer.__class__.__name__ == "Hammerstein": weights_real = layer.kernel_real.numpy() weights_imag = layer.kernel_imag.numpy() weights = weights_real + 1j * weights_imag if network_array[0] == "B": K1 = np.array([1.0]) K2 = np.array([0.0]) result_dict = { "train_loss": train_loss_list, "train_var": train_var_list, "test_loss": test_loss_list, "test_var": test_var_list, "model": path, "K1": K1, "K2": K2, "weights": weights, } return result_dict