def central(args, save_folder):
    # Generate a learning model
    network = learning_model(args)
    loader, class_list = load_batch(args)
    test_loader = load_test_batch(args, class_list)
    # Pre-run to compute total gradient size
    batch, labels = loader[0].__next__()
    # Compute local gradient
    user_grad, grad_dim, user_initial_shape = network.grad_compute(batch, labels)

    if args.power_constant:
        power_vec = args.power_avg * np.ones(args.user_number)
    else:
        power_vec = args.power_avg * np.random.rayleigh(scale=2, size=args.user_number)
    mask_obj = mask_generator(args.subchannel_number, len(user_grad))
    remainder = np.zeros((len(user_grad), args.user_number))
    grad_save = np.zeros((len(user_grad), args.user_number))
    h_save = np.zeros((args.subchannel_number, args.user_number))

    for i in range(args.iteration_number):
        # print('Iteration: ', str(i))
        if args.mask_style == "uniform":
            mask, mask_indices = mask_obj.uniform_next()
        else:
            mask, mask_indices = mask_obj.ordered_next()
        for m in range(args.user_number):
            # Load next batch of data
            batch, labels = loader[m].__next__()
            # Compute local gradient
            user_grad, user_final_shape, user_initial_shape = network.grad_compute(batch, labels)
            u_m = np.multiply(args.learning_rate, user_grad) + remainder[:, m]
            # Perform sparsification
            masked_grads = np.multiply(mask, u_m)
            grad_save[:, m] = masked_grads
            # Compute remainder for next iteration
            remainder[:, m] = u_m - masked_grads
            # Sample channel fading (iid) for each sub-channel
            h_m = get_channel_coef(args.exp_style, args.subchannel_number, args.h_coef)
            h_save[:, m] = h_m[0]
            # h_m = 0.000001 * np.random.rayleigh(scale=3, size=args.subchannel_number)

        # Perform power allocation
        opt = biconvex(args, h_save, power_vec, grad_save[mask_indices])
        b_km, rx, alpha = opt.power_allocation_central()

        estimator = np.zeros((len(args.exp_style), len(user_grad)))
        estimator[0, mask_indices] = rx

        # Update model parameters
        network.update_params(estimator[0])
        # Compute accuracy of model at each acc iterations
        if i % args.save_interval == 0:
            batch_test, labels_test = test_loader.__next__()
            accuracy = network.check_accuracy(batch_test, labels_test, i)
            save_module(args, save_folder, i, [accuracy], ['accuracy'])
 combined.compile(loss=['mse'], optimizer=optimizer1)
     
 
 #%% Training of model 
 epochs = 1000
 sample_interval = 5
 start_time = datetime.datetime.now()
 
 batch_size = 50
 
 # Adversarial loss ground truths
 valid = np.ones((batch_size,))
 fake = np.zeros((batch_size,))
 print('Training started')
 for epoch in range(epochs):
     for batch_i, (water_data, sat_data) in enumerate(load_batch(batch_size = batch_size)):
 
         # ---------------------
         #  Train Discriminator
         # ---------------------
 
         # Condition on B and generate a translated version
         fake_water_data = gen.predict(sat_data)
         # Train the discriminators (original images = real / generated = Fake)
         d_loss_real = disc.train_on_batch([water_data, water_data], valid)
         d_loss_fake = disc.train_on_batch([fake_water_data, water_data], fake)
         d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
 
         # -----------------
         #  Train Generator
         # -----------------
def main(args, save_folder):
    # Generate a learning model
    network = learning_model(args)
    loader, class_list = load_batch(args)
    test_loader = load_test_batch(args, class_list)
    # Pre-run to compute total gradient size
    batch, labels = loader[0].__next__()
    # Compute local gradient
    user_grad, grad_dim, user_initial_shape = network.grad_compute(batch, labels)

    if args.power_constant:
        power_vec = args.power_avg * np.ones(args.user_number)
    else:
        power_vec = args.power_avg*np.random.rayleigh(scale=2, size=args.user_number)
    mask_obj = mask_generator(args.subchannel_number, len(user_grad))
    remainder = np.zeros((len(user_grad), args.user_number))

    for i in range(args.iteration_number):
        # print('Iteration: ', str(i))
        if args.mask_style == "uniform":
            mask, mask_indices = mask_obj.uniform_next()
        else:
            mask, mask_indices = mask_obj.ordered_next()
        y = np.zeros((len(args.exp_style), args.subchannel_number))
        gamma = 0
        for m in range(args.user_number):
            # Load next batch of data
            batch, labels = loader[m].__next__()
            # Compute local gradient
            user_grad, user_final_shape, user_initial_shape = network.grad_compute(batch, labels)
            u_m = np.multiply(args.learning_rate, user_grad) + remainder[:, m]
            # Perform sparsification
            masked_grads = np.multiply(mask, u_m)
            # Compute remainder for next iteration
            remainder[:, m] = u_m - masked_grads
            # Sample channel fading (iid) for each sub-channel
            h_m = get_channel_coef(args.exp_style, args.subchannel_number, args.h_coef)
            # h_m = 0.000001 * np.random.rayleigh(scale=3, size=args.subchannel_number)
            # Perform power allocation
            b_km, tx_signal_m = power_allocation(args, h_m, power_vec[m], masked_grads[mask_indices])
            # Compute received signal
            y += tx_signal_m
            if "distributed" in args.exp_style:
                gamma += b_km[0][0] * h_m[0][0]
                # gamma += b_km[args.exp_style.index('distributed')][0] * h_m[args.exp_style.index('distributed')][0]
        # Perform equalization
        noise = np.random.normal(args.noise_mean, args.noise_std,
                                 (len(args.exp_style), args.subchannel_number))
        if args.exp_style[0] == 'error_free':
            rx = y
        else:
            rx = y + noise
        estimator = np.zeros((len(args.exp_style), len(user_grad)))
        for n in range(len(args.exp_style)):
            if args.exp_style[n] == 'distributed':
                estimator[n, mask_indices] = rx[n] / gamma
            elif args.exp_style[n] == 'error_free':
                estimator[n, mask_indices] = rx[n] / args.user_number
            elif args.exp_style[n] == 'single_user':
                estimator[n, mask_indices] = rx[n] / args.user_number / 10240
            elif args.exp_style[n] == 'centralized':
                print('To be implemented...')
            elif args.exp_style[n] == 'equal_power':
                estimator[n, mask_indices] = rx[n] / args.user_number
            else:
                print('Style is not defined!')
        # est_temp = y/gamma + noise/gamma
        # estimator = np.zeros(len(user_grad))
        # estimator[mask_indices] = est_temp[0]
        # Update model parameters
        network.update_params(estimator[0])
        # Compute accuracy of model at each acc iterations
        if i % args.save_interval == 0:
            batch_test, labels_test = test_loader.__next__()
            accuracy = network.check_accuracy(batch_test, labels_test, i)
            if args.exp_style[0] == 'distributed':
                save_module(args, save_folder, i, [gamma, accuracy], ['gamma', 'accuracy'])
            else:
                save_module(args, save_folder, i, [accuracy], ['accuracy'])
    combined.compile(loss=['mse'], optimizer=optimizer1)

    #%% Training of model
    epochs = 1000
    sample_interval = 5
    start_time = datetime.datetime.now()

    batch_size = 50

    # Adversarial loss ground truths
    valid = np.ones((batch_size, ))
    fake = np.zeros((batch_size, ))
    print('Training started')
    for epoch in range(epochs):
        for batch_i, (water_data, sat_data) in enumerate(
                load_batch(batch_size=batch_size)):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Condition on B and generate a translated version
            fake_water_data = gen.predict(sat_data)
            # Train the discriminators (original images = real / generated = Fake)
            d_loss_real = disc.train_on_batch([water_data, water_data], valid)
            d_loss_fake = disc.train_on_batch([fake_water_data, water_data],
                                              fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # -----------------
            #  Train Generator
import torch.nn as nn
# name.load_data()

print "blah"
batch_size = 64
num_batches = 517
count = 0
val_id = np.load('val.npy')
test_id = np.load('test.npy')

val_data = defaultdict(list)
train_batch = defaultdict(list)
test_data = defaultdict(list)
print "start fetching data"
for b in range(num_batches):
    ft, obj, att, vids = name.load_batch(b)
    print " batch loaded "
    vids = np.array(vids)
    #get indexes of common test data
    ind = np.nonzero(np.in1d(vids, val_id))[0]
    vids = np.delete(vids, ind)
    print " train data ---", len(val_data)

    test_data['features'].append(ft[ind, :])
    for i in ind:
        #get index of ones
        if np.any(obj[i, :] == 1):
            obj_l = np.where(obj[i, :] == 1)
        if np.any(att[i, :] == 1):
            att_l = np.where(att[i, :] == 1)
Example #6
0
    def train(self,
              train_names,
              test_names,
              base_dir,
              epochs=20,
              batch_size=64,
              test_batch_size=64,
              translate=[0, 0],
              flip=[0, 0],
              noise=0,
              model_name=None,
              pre_trained_model=None):

        # Create the generator to output batches of data with given transforms
        gen = Generator(train_names,
                        translate=translate,
                        flip=flip,
                        noise=noise)
        next_batch = gen.gen_batch(batch_size)

        test_gen = Generator(test_names)
        test_batch = test_gen.gen_batch(test_batch_size)

        # Set number of iterations (SIZE CAN BE CHANGED BECAUSE OF GENERATOR)
        aug_size = gen.aug_size()
        iters = int(aug_size / batch_size)
        print('number of batches for training: {}'.format(iters))

        # Set base levels and model name
        iter_tot = 0
        best_acc = 0
        self.losses = []
        if model_name == None:
            cur_model_name = 'basic_model'

        # Start session, initialize variables, and load pretrained model if any
        self.session = tf.Session()
        with self.session as sess:
            merge = tf.summary.merge_all()
            writer = tf.summary.FileWriter("log/{}".format('model'),
                                           self.session.graph)
            sess.run(tf.global_variables_initializer())
            if pre_trained_model != None:
                try:
                    print("Loading model from: {}".format(pre_trained_model))
                    self.saver.restore(sess,
                                       'model/{}'.format(pre_trained_model))
                except Exception:
                    raise ValueError("Failed Loading Model")

            # Set up loops for epochs and iterations per epochs
            for epoch in range(epochs):
                print("epoch {}".format(epoch + 1))

                for itr in range(iters):
                    merge = tf.summary.merge_all()
                    iter_tot += 1

                    # Create feed values using the generator
                    feed_names = next(next_batch)
                    feed_image, feed_accels, feed_tf = load_batch(
                        feed_names, base_dir)
                    feed = {
                        self.inputs: feed_image,
                        self.targets: feed_accels,
                        self.output_tf: feed_tf
                    }

                    # Feed values to optimizer and output loss (for printing)
                    _, cur_loss = sess.run([self.optimizer, self.loss],
                                           feed_dict=feed)
                    self.losses.append(cur_loss)

                    # After 100 iterations, check if test accuracy has increased
                    if iter_tot % 100 == 0:
                        feed_test = next(test_batch)
                        test_images, test_accels, test_tf = load_batch(
                            feed_test, base_dir)
                        pred = sess.run(
                            [self.pred],
                            feed_dict={
                                self.inputs: test_images,
                                self.targets: test_accels,
                                self.output_tf: test_tf
                            })
                        mse = np.mean((pred - test_accels)**2)
                        if mse < best_mse:
                            print('Best validation accuracy! iteration:'
                                  '{} mse: {}%'.format(iter_tot, mse))
                            best_mse = mse
                            self.saver.save(sess,
                                            'model/{}'.format(cur_model_name))

        print("Traning ends. The best valid accuracy is {}." \
               " Model named {}.".format(best_mse, cur_model_name))
def main():
    """
    An implementation of a generative adversarial network for speech enhancement. 
    """

    # Flags, specify the wanted actions
    TEST = True
    TRAIN = True
    SAVE = True
    LOAD = False
    SAMPLE_TESTING = True  # Run a sample enhancement at a specified epoch frequency of the validation set

    # Parameters specified for the construction of the generator and discriminator
    options = {}
    options[
        'Idun'] = False  # Set to true when running on Idun, s.t. the speech path and noise path get correct
    options['window_length'] = 16384
    options['feat_dim'] = 1
    options['z_dim'] = (8, 1024)  # Dimensions for the latent noise variable
    options['filter_length'] = 31
    options['strides'] = 2
    options['padding'] = 'same'
    options['use_bias'] = True
    options['initializer_std_dev'] = 0.02
    options['generator_encoder_num_kernels'] = [
        16, 32, 32, 64, 64, 128, 128, 256, 256, 512, 1024
    ]
    options['generator_decoder_num_kernels'] = options[
        'generator_encoder_num_kernels'][:-1][::-1] + [1]
    options['discriminator_num_kernels'] = [
        16, 32, 32, 64, 64, 128, 128, 256, 256, 512, 1024
    ]
    options['alpha'] = 0.3  # alpha in LeakyReLU
    options['show_summary'] = False
    options['learning_rate'] = 0.0002
    options['g_l1loss'] = 100.
    options['pre_emph'] = 0.95
    options['z_in_use'] = False  # Use latent noise z in generator?

    # File paths specified for local machine and the super computer Idun
    if options['Idun']:
        options[
            'speech_path'] = "/home/miralv/Master/Audio/sennheiser_1/part_1/"  # The validation set path is defined in the training section
        options['noise_path'] = "/home/miralv/Master/Audio/Nonspeech_v2/"
        options[
            'audio_folder_test'] = "/home/miralv/Master/Audio/sennheiser_1/part_1/Test/Selected"
        options[
            'noise_folder_test'] = "/home/miralv/Master/Audio/Nonspeech_v2/Test"

    else:
        options[
            'speech_path'] = "/home/shomec/m/miralv/Masteroppgave/Code/sennheiser_1/part_1/"
        options[
            'noise_path'] = "/home/shomec/m/miralv/Masteroppgave/Code/Nonspeech_v2/"
        options[
            'audio_folder_test'] = "/home/shomec/m/miralv/Masteroppgave/Code/sennheiser_1/part_1/Test/Selected"
        options[
            'noise_folder_test'] = "/home/shomec/m/miralv/Masteroppgave/Code/Nonspeech_v2/Test"

    options['batch_size'] = 200
    options['steps_per_epoch'] = 10
    options['n_epochs'] = 10
    options['snr_dbs_train'] = [0, 10, 15]
    options['snr_dbs_test'] = [0, 5, 10, 15]
    options['sample_rate'] = 16000
    options['test_frequency'] = 1  # Every nth epoch, run a sample enhancement
    print("Options are set.\n\n")

    # Specify optimizer (Needed also if we choose not to train)
    optimizer_D = keras.optimizers.RMSprop(lr=options['learning_rate'])
    optimizer_G = keras.optimizers.RMSprop(lr=options['learning_rate'])

    if TRAIN:
        if SAMPLE_TESTING:
            test_frequency = options['test_frequency']
            speech_list_sample_test = glob.glob(options['speech_path'] +
                                                "Validate/Selected/*")
            noise_list_sample_test = glob.glob(options['noise_path'] +
                                               "Validate/*")

        ## Set up the individual models
        print("Setting up individual models:\n")
        G = generator(options)
        print("G finished.\n")
        D = discriminator(options)
        print("D finished.\n\n")

        # Compile the individual models
        print("Compile the individual models.\n")
        D.compile(loss='mse', optimizer=optimizer_D)
        G.compile(loss='mae', optimizer=optimizer_G)

        ## Set up the combined model
        D.trainable = False
        audio_shape = (options['window_length'], options['feat_dim'])

        # Prepare inputs
        clean_audio_in = Input(shape=audio_shape, name='in_clean')
        noisy_audio_in = Input(shape=audio_shape, name='in_noisy')
        if options['z_in_use']:
            z_dim = options['z_dim']
            z = Input(shape=z_dim, name='noise_input')
            G_out = G([noisy_audio_in, z])
        else:
            G_out = G([noisy_audio_in])
        D_out = D([G_out, noisy_audio_in])

        print("Set up the combined model.\n")
        if options['z_in_use']:
            GAN = Model(inputs=[clean_audio_in, noisy_audio_in, z],
                        outputs=[D_out, G_out])
        else:
            GAN = Model(inputs=[clean_audio_in, noisy_audio_in],
                        outputs=[D_out, G_out])

        GAN.summary()
        GAN.compile(optimizer=optimizer_G,
                    loss={
                        'model_1': 'mae',
                        'model_2': 'mse'
                    },
                    loss_weights={
                        'model_1': options['g_l1loss'],
                        'model_2': 1
                    })

        # Write log manually
        if not os.path.exists("./logs"):
            os.makedirs("./logs")

        log_file_path_G = "./logs/G_" + datetime.datetime.now().strftime(
            "%Y%m%d-%H%M%S")
        log_file_path_D = "./logs/D_" + datetime.datetime.now().strftime(
            "%Y%m%d-%H%M%S")
        f_G = open(log_file_path_G, "w+")
        f_D = open(log_file_path_D, "w+")
        f_G.write(
            "Training loss\t\t\t\t    | Validation loss\nG_loss   G_D_loss G_l1_loss\t    | G_loss   G_D_loss G_l1_loss\n"
        )
        f_D.write(
            "Training loss\t\t\t\t    | Validation loss\nD_loss   D_r_loss D_f_loss \t    | D_loss   D_r_loss D_f_loss\n"
        )
        # tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

        # log_path = "./logs"
        # callback = TensorBoard(log_path)
        # callback.set_model(GAN)
        # train_names = ['G_loss', 'G_adv_loss', 'G_l1Loss']

        ## Model training
        n_epochs = options['n_epochs']
        steps_per_epoch = options['steps_per_epoch']
        batch_size = options['batch_size']

        start_time = datetime.datetime.now()

        # Define the class labels loss computation
        real_D = np.ones((batch_size, 1))  # For input pairs (clean, noisy)
        fake_D = np.zeros((batch_size, 1))  # For input pairs (enhanced, noisy)
        valid_G = np.array(
            [1] * batch_size
        )  # To compute the mse-loss in the generator's loss function

        print("Begin training.\n")
        for epoch in range(1, n_epochs + 1):
            for batch_i, (clean_audio,
                          noisy_audio) in enumerate(load_batch(options)):
                ## Train discriminator
                # Get G's input in correct shape
                clean_audio = np.expand_dims(
                    clean_audio, axis=2)  #dim -> (batchsize,windowsize,1)
                noisy_audio = np.expand_dims(noisy_audio, axis=2)

                # Get G's enhanced audio
                if options['z_in_use']:
                    noise_input = np.random.normal(
                        0, 1, (batch_size, z_dim[0], z_dim[1]))  #z
                    G_enhanced = G.predict([noisy_audio, noise_input])
                else:
                    G_enhanced = G.predict([noisy_audio])

                # Comput the discriminator's loss
                D_loss_real = D.train_on_batch(x=[clean_audio, noisy_audio],
                                               y=real_D)
                D_loss_fake = D.train_on_batch(x=[G_enhanced, noisy_audio],
                                               y=fake_D)
                D_loss = np.add(D_loss_real, D_loss_fake) / 2.0

                ## Train generator
                if options['z_in_use']:
                    [G_loss, G_D_loss, G_l1_loss] = GAN.train_on_batch(
                        x=[clean_audio, noisy_audio, noise_input],
                        y={
                            'model_1': clean_audio,
                            'model_2': valid_G
                        })

                else:
                    [G_loss, G_D_loss, G_l1_loss
                     ] = GAN.train_on_batch(x=[clean_audio, noisy_audio],
                                            y={
                                                'model_1': clean_audio,
                                                'model_2': valid_G
                                            })

                # logs = [G_loss, G_D_loss, G_l1_loss]
                # write_log(callback, train_names, logs, epoch)

                if SAMPLE_TESTING and epoch % test_frequency == 0 and batch_i == (
                        steps_per_epoch - 1):
                    print("Running sample test epoch %d." % (epoch))
                    [
                        val_loss_D, val_loss_D_real, val_loss_D_fake,
                        val_loss_G, val_loss_G_D, val_loss_G_l1
                    ] = run_sample_test(options, speech_list_sample_test,
                                        noise_list_sample_test, G, GAN, D,
                                        epoch)
                    print("Sample test finished.")
                    f_G.write("%f %f %f  \t| %f %f %f\n" %
                              (G_loss, G_D_loss, G_l1_loss, val_loss_G,
                               val_loss_G_D, val_loss_G_l1))
                    f_D.write("%f %f %f  \t| %f %f %f\n" %
                              (D_loss, D_loss_real, D_loss_fake, val_loss_D,
                               val_loss_D_real, val_loss_D_fake))

                    elapsed_time = datetime.datetime.now() - start_time
                    # Print training error
                    print(
                        "[Epoch %d/%d] [D loss: %f] [D real loss: %f] [D fake loss: %f] [G loss: %f] [G_D loss: %f] [G_L1 loss: %f] [Exec. time: %s]"
                        % (epoch, n_epochs, D_loss, D_loss_real, D_loss_fake,
                           G_loss, G_D_loss, G_l1_loss, elapsed_time))

        f_D.close()
        f_G.close()
        print("Training finished.\n")

    # Test the model
    if TEST:
        print("Test the model on unseen noises and voices.\n\n")
        noise_list = glob.glob(options['noise_folder_test'] + "/*.wav")
        speech_list = glob.glob(options['audio_folder_test'] + "/*.wav")

        if LOAD:
            print("Loading saved model\n")
            modeldir = os.getcwd()
            json_file = open(modeldir + "/Gmodel.json", "r")
            loaded_model_json = json_file.read()
            json_file.close()
            G = model_from_json(loaded_model_json)
            G.compile(loss='mean_squared_error', optimizer=optimizer_G)
            G.load_weights(modeldir + "/Gmodel.h5")

        SNR_dBs = options['snr_dbs_test']
        for speech_path in speech_list:
            options['audio_path_test'] = speech_path
            for noise_path in noise_list:
                options['noise_path_test'] = noise_path
                clean, mixed, z = prepare_test(
                    options)  #(snr_dbs, nwindows, windowlength)
                for i, snr_db in enumerate(SNR_dBs):
                    # Need to get G's input in the correct shape
                    audios_mixed = np.expand_dims(mixed[i], axis=2)

                    # Generate G's enhancement
                    if options['z_in_use']:
                        G_out = G.predict([audios_mixed, z[i]])
                    else:
                        G_out = G.predict([audios_mixed])

                    # Postprocess
                    clean_res, _ = postprocess(clean[i, :, :],
                                               coeff=options['pre_emph'])
                    mixed_res, _ = postprocess(mixed[i, :, :],
                                               coeff=options['pre_emph'])
                    G_enhanced, _ = postprocess(G_out,
                                                coeff=options['pre_emph'])

                    ## Save for listening
                    if not os.path.exists("./results"):
                        os.makedirs("./results")

                    # Want to save clean, enhanced and mixed.
                    sr = options['sample_rate']

                    if noise_path[-7] == 'n':
                        path_enhanced = "./results/enhanced_%s_%s_snr_%d.wav" % (
                            speech_path[-16:-4], noise_path[-7:-4], snr_db
                        )  # sentence id, noise id, snr_db
                        path_noisy = "./results/noisy_%s_%s_snr_%d.wav" % (
                            speech_path[-16:-4], noise_path[-7:-4], snr_db)
                        path_clean = "./results/clean_%s_%s_snr_%d.wav" % (
                            speech_path[-16:-4], noise_path[-7:-4], snr_db)

                    else:
                        path_enhanced = "./results/enhanced_%s_%s_snr_%d.wav" % (
                            speech_path[-16:-4], noise_path[-16:-4], snr_db)
                        path_noisy = "./results/noisy_%s_%s_snr_%d.wav" % (
                            speech_path[-16:-4], noise_path[-16:-4], snr_db)
                        path_clean = "./results/clean_%s_%s_snr_%d.wav" % (
                            speech_path[-16:-4], noise_path[-16:-4], snr_db)

                    # Because pesq is testing corresponding clean, noisy and enhanced, clean be stored similarly
                    save_audio(clean_res, path_clean, sr)
                    save_audio(mixed_res, path_noisy, sr)
                    save_audio(G_enhanced, path_enhanced, sr)
        print("Testing finished.")

    if SAVE and not LOAD:
        modeldir = os.getcwd()
        model_json = G.to_json()
        with open(modeldir + "/Gmodel_without_z.json", "w") as json_file:
            json_file.write(model_json)
        G.save_weights(modeldir + "/Gmodel_without_z.h5")
        print("Model saved to " + modeldir)