def central(args, save_folder): # Generate a learning model network = learning_model(args) loader, class_list = load_batch(args) test_loader = load_test_batch(args, class_list) # Pre-run to compute total gradient size batch, labels = loader[0].__next__() # Compute local gradient user_grad, grad_dim, user_initial_shape = network.grad_compute(batch, labels) if args.power_constant: power_vec = args.power_avg * np.ones(args.user_number) else: power_vec = args.power_avg * np.random.rayleigh(scale=2, size=args.user_number) mask_obj = mask_generator(args.subchannel_number, len(user_grad)) remainder = np.zeros((len(user_grad), args.user_number)) grad_save = np.zeros((len(user_grad), args.user_number)) h_save = np.zeros((args.subchannel_number, args.user_number)) for i in range(args.iteration_number): # print('Iteration: ', str(i)) if args.mask_style == "uniform": mask, mask_indices = mask_obj.uniform_next() else: mask, mask_indices = mask_obj.ordered_next() for m in range(args.user_number): # Load next batch of data batch, labels = loader[m].__next__() # Compute local gradient user_grad, user_final_shape, user_initial_shape = network.grad_compute(batch, labels) u_m = np.multiply(args.learning_rate, user_grad) + remainder[:, m] # Perform sparsification masked_grads = np.multiply(mask, u_m) grad_save[:, m] = masked_grads # Compute remainder for next iteration remainder[:, m] = u_m - masked_grads # Sample channel fading (iid) for each sub-channel h_m = get_channel_coef(args.exp_style, args.subchannel_number, args.h_coef) h_save[:, m] = h_m[0] # h_m = 0.000001 * np.random.rayleigh(scale=3, size=args.subchannel_number) # Perform power allocation opt = biconvex(args, h_save, power_vec, grad_save[mask_indices]) b_km, rx, alpha = opt.power_allocation_central() estimator = np.zeros((len(args.exp_style), len(user_grad))) estimator[0, mask_indices] = rx # Update model parameters network.update_params(estimator[0]) # Compute accuracy of model at each acc iterations if i % args.save_interval == 0: batch_test, labels_test = test_loader.__next__() accuracy = network.check_accuracy(batch_test, labels_test, i) save_module(args, save_folder, i, [accuracy], ['accuracy'])
combined.compile(loss=['mse'], optimizer=optimizer1) #%% Training of model epochs = 1000 sample_interval = 5 start_time = datetime.datetime.now() batch_size = 50 # Adversarial loss ground truths valid = np.ones((batch_size,)) fake = np.zeros((batch_size,)) print('Training started') for epoch in range(epochs): for batch_i, (water_data, sat_data) in enumerate(load_batch(batch_size = batch_size)): # --------------------- # Train Discriminator # --------------------- # Condition on B and generate a translated version fake_water_data = gen.predict(sat_data) # Train the discriminators (original images = real / generated = Fake) d_loss_real = disc.train_on_batch([water_data, water_data], valid) d_loss_fake = disc.train_on_batch([fake_water_data, water_data], fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # ----------------- # Train Generator # -----------------
def main(args, save_folder): # Generate a learning model network = learning_model(args) loader, class_list = load_batch(args) test_loader = load_test_batch(args, class_list) # Pre-run to compute total gradient size batch, labels = loader[0].__next__() # Compute local gradient user_grad, grad_dim, user_initial_shape = network.grad_compute(batch, labels) if args.power_constant: power_vec = args.power_avg * np.ones(args.user_number) else: power_vec = args.power_avg*np.random.rayleigh(scale=2, size=args.user_number) mask_obj = mask_generator(args.subchannel_number, len(user_grad)) remainder = np.zeros((len(user_grad), args.user_number)) for i in range(args.iteration_number): # print('Iteration: ', str(i)) if args.mask_style == "uniform": mask, mask_indices = mask_obj.uniform_next() else: mask, mask_indices = mask_obj.ordered_next() y = np.zeros((len(args.exp_style), args.subchannel_number)) gamma = 0 for m in range(args.user_number): # Load next batch of data batch, labels = loader[m].__next__() # Compute local gradient user_grad, user_final_shape, user_initial_shape = network.grad_compute(batch, labels) u_m = np.multiply(args.learning_rate, user_grad) + remainder[:, m] # Perform sparsification masked_grads = np.multiply(mask, u_m) # Compute remainder for next iteration remainder[:, m] = u_m - masked_grads # Sample channel fading (iid) for each sub-channel h_m = get_channel_coef(args.exp_style, args.subchannel_number, args.h_coef) # h_m = 0.000001 * np.random.rayleigh(scale=3, size=args.subchannel_number) # Perform power allocation b_km, tx_signal_m = power_allocation(args, h_m, power_vec[m], masked_grads[mask_indices]) # Compute received signal y += tx_signal_m if "distributed" in args.exp_style: gamma += b_km[0][0] * h_m[0][0] # gamma += b_km[args.exp_style.index('distributed')][0] * h_m[args.exp_style.index('distributed')][0] # Perform equalization noise = np.random.normal(args.noise_mean, args.noise_std, (len(args.exp_style), args.subchannel_number)) if args.exp_style[0] == 'error_free': rx = y else: rx = y + noise estimator = np.zeros((len(args.exp_style), len(user_grad))) for n in range(len(args.exp_style)): if args.exp_style[n] == 'distributed': estimator[n, mask_indices] = rx[n] / gamma elif args.exp_style[n] == 'error_free': estimator[n, mask_indices] = rx[n] / args.user_number elif args.exp_style[n] == 'single_user': estimator[n, mask_indices] = rx[n] / args.user_number / 10240 elif args.exp_style[n] == 'centralized': print('To be implemented...') elif args.exp_style[n] == 'equal_power': estimator[n, mask_indices] = rx[n] / args.user_number else: print('Style is not defined!') # est_temp = y/gamma + noise/gamma # estimator = np.zeros(len(user_grad)) # estimator[mask_indices] = est_temp[0] # Update model parameters network.update_params(estimator[0]) # Compute accuracy of model at each acc iterations if i % args.save_interval == 0: batch_test, labels_test = test_loader.__next__() accuracy = network.check_accuracy(batch_test, labels_test, i) if args.exp_style[0] == 'distributed': save_module(args, save_folder, i, [gamma, accuracy], ['gamma', 'accuracy']) else: save_module(args, save_folder, i, [accuracy], ['accuracy'])
combined.compile(loss=['mse'], optimizer=optimizer1) #%% Training of model epochs = 1000 sample_interval = 5 start_time = datetime.datetime.now() batch_size = 50 # Adversarial loss ground truths valid = np.ones((batch_size, )) fake = np.zeros((batch_size, )) print('Training started') for epoch in range(epochs): for batch_i, (water_data, sat_data) in enumerate( load_batch(batch_size=batch_size)): # --------------------- # Train Discriminator # --------------------- # Condition on B and generate a translated version fake_water_data = gen.predict(sat_data) # Train the discriminators (original images = real / generated = Fake) d_loss_real = disc.train_on_batch([water_data, water_data], valid) d_loss_fake = disc.train_on_batch([fake_water_data, water_data], fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # ----------------- # Train Generator
import torch.nn as nn # name.load_data() print "blah" batch_size = 64 num_batches = 517 count = 0 val_id = np.load('val.npy') test_id = np.load('test.npy') val_data = defaultdict(list) train_batch = defaultdict(list) test_data = defaultdict(list) print "start fetching data" for b in range(num_batches): ft, obj, att, vids = name.load_batch(b) print " batch loaded " vids = np.array(vids) #get indexes of common test data ind = np.nonzero(np.in1d(vids, val_id))[0] vids = np.delete(vids, ind) print " train data ---", len(val_data) test_data['features'].append(ft[ind, :]) for i in ind: #get index of ones if np.any(obj[i, :] == 1): obj_l = np.where(obj[i, :] == 1) if np.any(att[i, :] == 1): att_l = np.where(att[i, :] == 1)
def train(self, train_names, test_names, base_dir, epochs=20, batch_size=64, test_batch_size=64, translate=[0, 0], flip=[0, 0], noise=0, model_name=None, pre_trained_model=None): # Create the generator to output batches of data with given transforms gen = Generator(train_names, translate=translate, flip=flip, noise=noise) next_batch = gen.gen_batch(batch_size) test_gen = Generator(test_names) test_batch = test_gen.gen_batch(test_batch_size) # Set number of iterations (SIZE CAN BE CHANGED BECAUSE OF GENERATOR) aug_size = gen.aug_size() iters = int(aug_size / batch_size) print('number of batches for training: {}'.format(iters)) # Set base levels and model name iter_tot = 0 best_acc = 0 self.losses = [] if model_name == None: cur_model_name = 'basic_model' # Start session, initialize variables, and load pretrained model if any self.session = tf.Session() with self.session as sess: merge = tf.summary.merge_all() writer = tf.summary.FileWriter("log/{}".format('model'), self.session.graph) sess.run(tf.global_variables_initializer()) if pre_trained_model != None: try: print("Loading model from: {}".format(pre_trained_model)) self.saver.restore(sess, 'model/{}'.format(pre_trained_model)) except Exception: raise ValueError("Failed Loading Model") # Set up loops for epochs and iterations per epochs for epoch in range(epochs): print("epoch {}".format(epoch + 1)) for itr in range(iters): merge = tf.summary.merge_all() iter_tot += 1 # Create feed values using the generator feed_names = next(next_batch) feed_image, feed_accels, feed_tf = load_batch( feed_names, base_dir) feed = { self.inputs: feed_image, self.targets: feed_accels, self.output_tf: feed_tf } # Feed values to optimizer and output loss (for printing) _, cur_loss = sess.run([self.optimizer, self.loss], feed_dict=feed) self.losses.append(cur_loss) # After 100 iterations, check if test accuracy has increased if iter_tot % 100 == 0: feed_test = next(test_batch) test_images, test_accels, test_tf = load_batch( feed_test, base_dir) pred = sess.run( [self.pred], feed_dict={ self.inputs: test_images, self.targets: test_accels, self.output_tf: test_tf }) mse = np.mean((pred - test_accels)**2) if mse < best_mse: print('Best validation accuracy! iteration:' '{} mse: {}%'.format(iter_tot, mse)) best_mse = mse self.saver.save(sess, 'model/{}'.format(cur_model_name)) print("Traning ends. The best valid accuracy is {}." \ " Model named {}.".format(best_mse, cur_model_name))
def main(): """ An implementation of a generative adversarial network for speech enhancement. """ # Flags, specify the wanted actions TEST = True TRAIN = True SAVE = True LOAD = False SAMPLE_TESTING = True # Run a sample enhancement at a specified epoch frequency of the validation set # Parameters specified for the construction of the generator and discriminator options = {} options[ 'Idun'] = False # Set to true when running on Idun, s.t. the speech path and noise path get correct options['window_length'] = 16384 options['feat_dim'] = 1 options['z_dim'] = (8, 1024) # Dimensions for the latent noise variable options['filter_length'] = 31 options['strides'] = 2 options['padding'] = 'same' options['use_bias'] = True options['initializer_std_dev'] = 0.02 options['generator_encoder_num_kernels'] = [ 16, 32, 32, 64, 64, 128, 128, 256, 256, 512, 1024 ] options['generator_decoder_num_kernels'] = options[ 'generator_encoder_num_kernels'][:-1][::-1] + [1] options['discriminator_num_kernels'] = [ 16, 32, 32, 64, 64, 128, 128, 256, 256, 512, 1024 ] options['alpha'] = 0.3 # alpha in LeakyReLU options['show_summary'] = False options['learning_rate'] = 0.0002 options['g_l1loss'] = 100. options['pre_emph'] = 0.95 options['z_in_use'] = False # Use latent noise z in generator? # File paths specified for local machine and the super computer Idun if options['Idun']: options[ 'speech_path'] = "/home/miralv/Master/Audio/sennheiser_1/part_1/" # The validation set path is defined in the training section options['noise_path'] = "/home/miralv/Master/Audio/Nonspeech_v2/" options[ 'audio_folder_test'] = "/home/miralv/Master/Audio/sennheiser_1/part_1/Test/Selected" options[ 'noise_folder_test'] = "/home/miralv/Master/Audio/Nonspeech_v2/Test" else: options[ 'speech_path'] = "/home/shomec/m/miralv/Masteroppgave/Code/sennheiser_1/part_1/" options[ 'noise_path'] = "/home/shomec/m/miralv/Masteroppgave/Code/Nonspeech_v2/" options[ 'audio_folder_test'] = "/home/shomec/m/miralv/Masteroppgave/Code/sennheiser_1/part_1/Test/Selected" options[ 'noise_folder_test'] = "/home/shomec/m/miralv/Masteroppgave/Code/Nonspeech_v2/Test" options['batch_size'] = 200 options['steps_per_epoch'] = 10 options['n_epochs'] = 10 options['snr_dbs_train'] = [0, 10, 15] options['snr_dbs_test'] = [0, 5, 10, 15] options['sample_rate'] = 16000 options['test_frequency'] = 1 # Every nth epoch, run a sample enhancement print("Options are set.\n\n") # Specify optimizer (Needed also if we choose not to train) optimizer_D = keras.optimizers.RMSprop(lr=options['learning_rate']) optimizer_G = keras.optimizers.RMSprop(lr=options['learning_rate']) if TRAIN: if SAMPLE_TESTING: test_frequency = options['test_frequency'] speech_list_sample_test = glob.glob(options['speech_path'] + "Validate/Selected/*") noise_list_sample_test = glob.glob(options['noise_path'] + "Validate/*") ## Set up the individual models print("Setting up individual models:\n") G = generator(options) print("G finished.\n") D = discriminator(options) print("D finished.\n\n") # Compile the individual models print("Compile the individual models.\n") D.compile(loss='mse', optimizer=optimizer_D) G.compile(loss='mae', optimizer=optimizer_G) ## Set up the combined model D.trainable = False audio_shape = (options['window_length'], options['feat_dim']) # Prepare inputs clean_audio_in = Input(shape=audio_shape, name='in_clean') noisy_audio_in = Input(shape=audio_shape, name='in_noisy') if options['z_in_use']: z_dim = options['z_dim'] z = Input(shape=z_dim, name='noise_input') G_out = G([noisy_audio_in, z]) else: G_out = G([noisy_audio_in]) D_out = D([G_out, noisy_audio_in]) print("Set up the combined model.\n") if options['z_in_use']: GAN = Model(inputs=[clean_audio_in, noisy_audio_in, z], outputs=[D_out, G_out]) else: GAN = Model(inputs=[clean_audio_in, noisy_audio_in], outputs=[D_out, G_out]) GAN.summary() GAN.compile(optimizer=optimizer_G, loss={ 'model_1': 'mae', 'model_2': 'mse' }, loss_weights={ 'model_1': options['g_l1loss'], 'model_2': 1 }) # Write log manually if not os.path.exists("./logs"): os.makedirs("./logs") log_file_path_G = "./logs/G_" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") log_file_path_D = "./logs/D_" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") f_G = open(log_file_path_G, "w+") f_D = open(log_file_path_D, "w+") f_G.write( "Training loss\t\t\t\t | Validation loss\nG_loss G_D_loss G_l1_loss\t | G_loss G_D_loss G_l1_loss\n" ) f_D.write( "Training loss\t\t\t\t | Validation loss\nD_loss D_r_loss D_f_loss \t | D_loss D_r_loss D_f_loss\n" ) # tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) # log_path = "./logs" # callback = TensorBoard(log_path) # callback.set_model(GAN) # train_names = ['G_loss', 'G_adv_loss', 'G_l1Loss'] ## Model training n_epochs = options['n_epochs'] steps_per_epoch = options['steps_per_epoch'] batch_size = options['batch_size'] start_time = datetime.datetime.now() # Define the class labels loss computation real_D = np.ones((batch_size, 1)) # For input pairs (clean, noisy) fake_D = np.zeros((batch_size, 1)) # For input pairs (enhanced, noisy) valid_G = np.array( [1] * batch_size ) # To compute the mse-loss in the generator's loss function print("Begin training.\n") for epoch in range(1, n_epochs + 1): for batch_i, (clean_audio, noisy_audio) in enumerate(load_batch(options)): ## Train discriminator # Get G's input in correct shape clean_audio = np.expand_dims( clean_audio, axis=2) #dim -> (batchsize,windowsize,1) noisy_audio = np.expand_dims(noisy_audio, axis=2) # Get G's enhanced audio if options['z_in_use']: noise_input = np.random.normal( 0, 1, (batch_size, z_dim[0], z_dim[1])) #z G_enhanced = G.predict([noisy_audio, noise_input]) else: G_enhanced = G.predict([noisy_audio]) # Comput the discriminator's loss D_loss_real = D.train_on_batch(x=[clean_audio, noisy_audio], y=real_D) D_loss_fake = D.train_on_batch(x=[G_enhanced, noisy_audio], y=fake_D) D_loss = np.add(D_loss_real, D_loss_fake) / 2.0 ## Train generator if options['z_in_use']: [G_loss, G_D_loss, G_l1_loss] = GAN.train_on_batch( x=[clean_audio, noisy_audio, noise_input], y={ 'model_1': clean_audio, 'model_2': valid_G }) else: [G_loss, G_D_loss, G_l1_loss ] = GAN.train_on_batch(x=[clean_audio, noisy_audio], y={ 'model_1': clean_audio, 'model_2': valid_G }) # logs = [G_loss, G_D_loss, G_l1_loss] # write_log(callback, train_names, logs, epoch) if SAMPLE_TESTING and epoch % test_frequency == 0 and batch_i == ( steps_per_epoch - 1): print("Running sample test epoch %d." % (epoch)) [ val_loss_D, val_loss_D_real, val_loss_D_fake, val_loss_G, val_loss_G_D, val_loss_G_l1 ] = run_sample_test(options, speech_list_sample_test, noise_list_sample_test, G, GAN, D, epoch) print("Sample test finished.") f_G.write("%f %f %f \t| %f %f %f\n" % (G_loss, G_D_loss, G_l1_loss, val_loss_G, val_loss_G_D, val_loss_G_l1)) f_D.write("%f %f %f \t| %f %f %f\n" % (D_loss, D_loss_real, D_loss_fake, val_loss_D, val_loss_D_real, val_loss_D_fake)) elapsed_time = datetime.datetime.now() - start_time # Print training error print( "[Epoch %d/%d] [D loss: %f] [D real loss: %f] [D fake loss: %f] [G loss: %f] [G_D loss: %f] [G_L1 loss: %f] [Exec. time: %s]" % (epoch, n_epochs, D_loss, D_loss_real, D_loss_fake, G_loss, G_D_loss, G_l1_loss, elapsed_time)) f_D.close() f_G.close() print("Training finished.\n") # Test the model if TEST: print("Test the model on unseen noises and voices.\n\n") noise_list = glob.glob(options['noise_folder_test'] + "/*.wav") speech_list = glob.glob(options['audio_folder_test'] + "/*.wav") if LOAD: print("Loading saved model\n") modeldir = os.getcwd() json_file = open(modeldir + "/Gmodel.json", "r") loaded_model_json = json_file.read() json_file.close() G = model_from_json(loaded_model_json) G.compile(loss='mean_squared_error', optimizer=optimizer_G) G.load_weights(modeldir + "/Gmodel.h5") SNR_dBs = options['snr_dbs_test'] for speech_path in speech_list: options['audio_path_test'] = speech_path for noise_path in noise_list: options['noise_path_test'] = noise_path clean, mixed, z = prepare_test( options) #(snr_dbs, nwindows, windowlength) for i, snr_db in enumerate(SNR_dBs): # Need to get G's input in the correct shape audios_mixed = np.expand_dims(mixed[i], axis=2) # Generate G's enhancement if options['z_in_use']: G_out = G.predict([audios_mixed, z[i]]) else: G_out = G.predict([audios_mixed]) # Postprocess clean_res, _ = postprocess(clean[i, :, :], coeff=options['pre_emph']) mixed_res, _ = postprocess(mixed[i, :, :], coeff=options['pre_emph']) G_enhanced, _ = postprocess(G_out, coeff=options['pre_emph']) ## Save for listening if not os.path.exists("./results"): os.makedirs("./results") # Want to save clean, enhanced and mixed. sr = options['sample_rate'] if noise_path[-7] == 'n': path_enhanced = "./results/enhanced_%s_%s_snr_%d.wav" % ( speech_path[-16:-4], noise_path[-7:-4], snr_db ) # sentence id, noise id, snr_db path_noisy = "./results/noisy_%s_%s_snr_%d.wav" % ( speech_path[-16:-4], noise_path[-7:-4], snr_db) path_clean = "./results/clean_%s_%s_snr_%d.wav" % ( speech_path[-16:-4], noise_path[-7:-4], snr_db) else: path_enhanced = "./results/enhanced_%s_%s_snr_%d.wav" % ( speech_path[-16:-4], noise_path[-16:-4], snr_db) path_noisy = "./results/noisy_%s_%s_snr_%d.wav" % ( speech_path[-16:-4], noise_path[-16:-4], snr_db) path_clean = "./results/clean_%s_%s_snr_%d.wav" % ( speech_path[-16:-4], noise_path[-16:-4], snr_db) # Because pesq is testing corresponding clean, noisy and enhanced, clean be stored similarly save_audio(clean_res, path_clean, sr) save_audio(mixed_res, path_noisy, sr) save_audio(G_enhanced, path_enhanced, sr) print("Testing finished.") if SAVE and not LOAD: modeldir = os.getcwd() model_json = G.to_json() with open(modeldir + "/Gmodel_without_z.json", "w") as json_file: json_file.write(model_json) G.save_weights(modeldir + "/Gmodel_without_z.h5") print("Model saved to " + modeldir)