def run_training(HYPARMS):
    data_sets = inputdata.read_data_sets(HYPARMS.input_data_dir)
    train_set = data_sets.train
    test_set = data_sets.test
    #data_sets = input_data.read_data_sets(HYPARMS.input_data_dir, HYPARMS.fake_data)

    with tf.Graph().as_default():
        placebundle = placeholder_inputs(HYPARMS.batch_size)
        logits = graph_model(placebundle)
        loss = calcul_loss(logits, placebundle)
        train_op = training(loss, HYPARMS.learning_rate)
        eval_correct = evaluation(logits, placebundle)

        with tf.Session() as sess:
            init = tf.initialize_all_variables()
            sess.run(init)
            saver = tf.train.Saver()

            for step in xrange(HYPARMS.max_steps):
                start_time = time.time()

                feed_dict = fill_feed_dict(train_set, placebundle.x,
                                           placebundle.y_,
                                           placebundle.keep_prob, HYPARMS)

                # Run one step of the model.  The return values are the activations
                # from the `train_op` (which is discarded) and the `loss` Op.  To
                # inspect the values of your Ops or variables, you may include them
                # in the list passed to sess.run() and the value tensors will be
                # returned in the tuple from the call.
                _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

                duration = time.time() - start_time
                if step % 100 == 0:
                    # Print status to stdout.
                    print('Step %d: loss = %.2f (%.3f sec)' %
                          (step, loss_value, duration))
                    # Update the events file.
                    # summary_str = sess.run(summary, feed_dict=feed_dict)
                    # summary_writer.add_summary(summary_str, step)
                    # summary_writer.flush()

                # Save a checkpoint and evaluate the model periodically.
                if (step + 1) % 1000 == 0 or (step + 1) == HYPARMS.max_steps:
                    checkpoint_file = os.path.join(HYPARMS.ckpt_dir,
                                                   HYPARMS.ckpt_name)
                    saver.save(sess, checkpoint_file, global_step=step)
                    # Evaluate against the training set.
                    print('Training Data Eval:')
                    do_eval(sess, eval_correct, placebundle.x, placebundle.y_,
                            placebundle.keep_prob, train_set, HYPARMS)
                    # Evaluate against the test set.
                    print('Test Data Eval:')
                    do_eval(sess, eval_correct, placebundle.x, placebundle.y_,
                            placebundle.keep_prob, test_set, HYPARMS)
Esempio n. 2
0
def train_mlp(L1_reg = 0.0, L2_reg = 0.0000, num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/data/'):
    

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size
    
    voc_list_valid = Vocabulary(path_name + 'valid')
    voc_list_valid.vocab_create()
    valid_words_count = voc_list_valid.count
    #print valid_words_count
    valid_lines_count = voc_list_valid.line_count
    #print valid_lines_count

    voc_list_test = Vocabulary(path_name + 'test')
    voc_list_test.vocab_create()
    test_words_count = voc_list_test.count
    #print test_words_count
    test_lines_count = voc_list_test.line_count
    #print test_lines_count
 
    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size )
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size )
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size )

    #exp_name = 'fine_tuning.hdf5'
    
    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x = T.fvector('x')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate') 

    #theano shared variables for train, valid and test
    train_set_x = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    valid_set_x = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    test_set_x = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True)
    test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    rng = numpy.random.RandomState() 
   
    classifier = MLP(rng = rng, input = x, n_in = vocab_size, fea_dim = 30, context_size = 2, n_hidden = 60 , n_out = vocab_size)

    cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
    
    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate=0.005, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    #learnrate_schedular = LearningRateList(learn_list)

    frame_error = classifier.errors(y)
    likelihood = classifier.sum(y)
    
    #test_model
    test_model = theano.function(inputs = [], outputs = likelihood,  \
                                 givens = {x: test_set_x,
                                           y: test_set_y})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [frame_error, likelihood], \
                                     givens = {x: valid_set_x,
                                               y: valid_set_y})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters 
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))
        
    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))
    
    #training_model
    train_model = theano.function(inputs = [theano.Param(learning_rate, default = 0.01)], outputs = cost, updates = updates, \
                                 givens = {x: train_set_x,
                                           y: train_set_y})
   

    training(dataprovider_train, dataprovider_valid, learnrate_schedular, classifier, train_model, validate_model, train_set_x, train_set_y, valid_set_x, valid_set_y, batch_size, num_batches_per_bunch, valid_words_count, valid_lines_count) 
    testing(dataprovider_test, classifier, test_model, test_set_x, test_set_y, test_words_count, test_lines_count)
    elif training_mode:
        #Example: python main.py --mode="training"
        #Path were to read spectrograms of noisy voice and clean voice
        path_save_spectrogram = args.path_save_spectrogram
        #path to find pre-trained weights / save models
        weights_path = args.weights_folder
        #pre trained model
        name_model = args.name_model
        #Training from scratch vs training from pre-trained weights
        training_from_scratch = args.training_from_scratch
        #epochs for training
        epochs = args.epochs
        #batch size for training
        batch_size = args.batch_size

        training(path_save_spectrogram, weights_path, name_model, training_from_scratch, epochs, batch_size)

    elif prediction_mode:
        #Example: python main.py --mode="prediction"
        #path to find pre-trained weights / save models
        weights_path = args.weights_folder
        #pre trained model
        name_model = args.name_model
        #directory where read noisy sound to denoise
        audio_dir_prediction = args.audio_dir_prediction
        #directory to save the denoise sound
        dir_save_prediction = args.dir_save_prediction
        #Name noisy sound file to denoise
        audio_input_prediction = args.audio_input_prediction
        #Name of denoised sound file to save
        audio_output_prediction = args.audio_output_prediction
def foo():
    path = os.path.dirname(os.path.abspath(__file__))

    # Run the training on the ideal model phase
    # cfg = "{}/leo_rbdl_zmq_drl.yaml".format(path)
    # a = start(cfg)
    # time.sleep(5)
    # #

    # No of policy iterations
    for ii in range(1, 10):
        #
        # No of runs of one policy
        for i in range(1):
            #
            # Run the trained policy on a real model
            global_params.ou_sigma = 0.1
            global_params.ou_theta = 0.15
            d = {
                'transitions': {
                    'load': 0,
                    'save': 1,
                    'save_filename': 'saved_data-perturbed-{}'.format(ii),
                    'buffer_size': 5000
                },
                'difference_model': 0
            }
            with open('config.yaml', 'w') as yaml_file:
                yaml.dump(d, yaml_file, default_flow_style=False)
            cfg = "{}/leo_rbdl_zmq_drl_2.yaml".format(path)
            new_cfg = rl_run_rbdl_agent(cfg, ii - 1)

            start(new_cfg)
            time.sleep(2)

            # Run the transitions on the original model
            d = {
                'transitions': {
                    'load': 1,
                    'load_filename': 'saved_data-perturbed-{}'.format(ii),
                    'save': 1,
                    'save_filename': 'saved_data-original-{}'.format(ii),
                    'buffer_size': 5000
                },
                'difference_model': 0
            }
            with open('config.yaml', 'w') as yaml_file:
                yaml.dump(d, yaml_file, default_flow_style=False)
            cfg = "{}/leo_rbdl_zmq_drl_3.yaml".format(path)
            start(cfg)

            # Train a new difference model or update one
            with tf.Graph().as_default() as diff_model:
                model = DifferenceModel(24, 18)
                with tf.Session(graph=diff_model) as sess:
                    if i == 0 and ii == 1:
                        d = {'difference_model': 0}
                    else:
                        d = {'difference_model': 1}
                    with open('config.yaml', 'w') as yaml_file:
                        yaml.dump(d, yaml_file, default_flow_style=False)

                    perturbed_files = [
                        'saved_data-perturbed-{}'.format(b)
                        for b in range(1, ii + 1)
                    ]
                    ideal_files = [
                        'saved_data-original-{}'.format(b)
                        for b in range(1, ii + 1)
                    ]
                    print perturbed_files
                    model = training(sess, model, perturbed_files, ideal_files,
                                     24, 18, 300)

        # Training the policy with the difference model included
        global_params.ou_sigma = 0.12
        global_params.ou_theta = 0.15
        global_params.actor_learning_rate = 0.0001
        global_params.critic_learning_rate = 0.001
        iterations = 0
        while not global_params.learning_success and iterations != 1:
            if ii == 1:
                d = {
                    'replay_buffer': {
                        'load': 0,
                        'save': 1,
                        'save_filename': 'saved_replay_buffer',
                        'buffer_size': 100000
                    },
                    'difference_model': 0
                }
                with open('config.yaml', 'w') as yaml_file:
                    yaml.dump(d, yaml_file, default_flow_style=False)
            else:
                d = {
                    'replay_buffer': {
                        'load': 1,
                        'load_filename': 'saved_replay_buffer',
                        'save': 0,
                        'save_filename': 'saved_replay_buffer',
                        'buffer_size': 100000
                    },
                    'difference_model': 0
                }
                with open('config.yaml', 'w') as yaml_file:
                    yaml.dump(d, yaml_file, default_flow_style=False)
            cfg = "{}/leo_rbdl_zmq_drl.yaml".format(path)
            new_cfg = rl_run_rbdl_agent(cfg, ii - 1)
            start(new_cfg, ii)

            time.sleep(5)

            # Running the learned policy on the difference model on the perturbed system to see if it works
            global_params.test_run_on_model = 1

            d = {
                'replay_buffer': {
                    'load': 0,
                    'save': 0,
                    'buffer_size': 2000
                },
                'difference_model': 0
            }
            with open('config.yaml', 'w') as yaml_file:
                yaml.dump(d, yaml_file, default_flow_style=False)
            cfg = "{}/leo_rbdl_zmq_drl_real.yaml".format(path)
            new_cfg = rl_run_rbdl_agent(cfg, ii)
            start(new_cfg)
            global_params.test_run_on_model = 0

            iterations += 1

        global_params.learning_success = 0

        if global_params.learning_success:
            print("Entire training was successful")
            break
Esempio n. 5
0
def train_mlp(
        L1_reg=0.0,
        L2_reg=0.0000,
        num_batches_per_bunch=512,
        batch_size=1,
        num_bunches_queue=5,
        offset=0,
        path_name='/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/data/'
):

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size

    voc_list_valid = Vocabulary(path_name + 'valid')
    voc_list_valid.vocab_create()
    valid_words_count = voc_list_valid.count
    #print valid_words_count
    valid_lines_count = voc_list_valid.line_count
    #print valid_lines_count

    voc_list_test = Vocabulary(path_name + 'test')
    voc_list_test.vocab_create()
    test_words_count = voc_list_test.count
    #print test_words_count
    test_lines_count = voc_list_test.line_count
    #print test_lines_count

    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size)
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size)

    #exp_name = 'fine_tuning.hdf5'

    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x = T.fvector('x')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate')

    #theano shared variables for train, valid and test
    train_set_x = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    train_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    valid_set_x = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    valid_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    test_set_x = theano.shared(numpy.empty((1), dtype='float32'),
                               allow_downcast=True)
    test_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                               allow_downcast=True)

    rng = numpy.random.RandomState()

    classifier = MLP(rng=rng,
                     input=x,
                     n_in=vocab_size,
                     fea_dim=30,
                     context_size=2,
                     n_hidden=60,
                     n_out=vocab_size)

    cost = classifier.negative_log_likelihood(
        y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr

    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate=0.005, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    #learnrate_schedular = LearningRateList(learn_list)

    frame_error = classifier.errors(y)
    likelihood = classifier.sum(y)

    #test_model
    test_model = theano.function(inputs = [], outputs = likelihood,  \
                                 givens = {x: test_set_x,
                                           y: test_set_y})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [frame_error, likelihood], \
                                     givens = {x: valid_set_x,
                                               y: valid_set_y})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))

    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))

    #training_model
    train_model = theano.function(inputs = [theano.Param(learning_rate, default = 0.01)], outputs = cost, updates = updates, \
                                 givens = {x: train_set_x,
                                           y: train_set_y})

    training(dataprovider_train, dataprovider_valid, learnrate_schedular,
             classifier, train_model, validate_model, train_set_x, train_set_y,
             valid_set_x, valid_set_y, batch_size, num_batches_per_bunch,
             valid_words_count, valid_lines_count)
    testing(dataprovider_test, classifier, test_model, test_set_x, test_set_y,
            test_words_count, test_lines_count)
Esempio n. 6
0
# preprocess('data/padi.csv', 1)
path_to_file = 'data/padi.csv'
province = 'DI YOGYAKARTA'
sliding_window = 3
train_test_proportion = 0.8

data = pd.read_csv(path_to_file)
preprocessed = preprocess(data[data['Provinsi'] == province],\
                            sliding_window)

train_data = preprocessed[:int(train_test_proportion * len(preprocessed))]
test_data = preprocessed.drop(train_data.index)

train_labels = train_data.pop(train_data.columns[-1])
test_labels = test_data.pop(test_data.columns[-1])

model = building_model(train_data)
tf.random.set_seed(28)
trained_model = training(model,
                         train_data,
                         train_labels,
                         epochs=1000,
                         early_stop=False)
ploting_history(trained_model)

print('')
evaluation(model, test_data, test_labels)

predict_result = model.predict(test_data).flatten()
print(predict_result)
Esempio n. 7
0
        batch_size = args.batch_size

        list_noise_files = os.listdir(noise_dir + dataset_noise + '/')
        list_voice_files = os.listdir(voice_dir + dataset_voice + '/')

        if dataset_noise == 'Metal':
            print('Special Confirmed!')
            create_data_special(noise_dir, voice_dir, dataset_noise,
                                dataset_voice, time_wave_dir, sound_dir,
                                spectrogram_dir, sample_rate, min_duration,
                                frame_length, hop_length_frame,
                                hop_length_frame_noise, nb_samples, n_fft,
                                hop_length_fft, list_noise_files,
                                list_voice_files)
            training(dataset_noise, dataset_voice, spectrogram_dir,
                     weights_dir, model_name, training_from_scratch, epochs,
                     batch_size)
        else:
            # create_data(noise_dir, voice_dir, dataset_noise, dataset_voice, time_wave_dir, sound_dir,
            #             spectrogram_dir, sample_rate, min_duration, frame_length, hop_length_frame,
            #             hop_length_frame_noise, nb_samples, n_fft, hop_length_fft, list_noise_files,
            #             list_voice_files)
            training(dataset_noise, dataset_voice, spectrogram_dir,
                     weights_dir, model_name, training_from_scratch, epochs,
                     batch_size)

    elif prediction_mode:

        weights_dir = args.weights_dir
        model_name = args.model_name
        input_dir = args.input_dir
Esempio n. 8
0
        # Example: python main.py --mode="training"

        # Path were to read spectrograms of noisy voice and clean voice
        path_save_spectrogram = args.path_save_spectrogram
        # path to find pre-trained weights / save models
        weights_path = args.weights_folder
        # pre trained model
        name_model = args.name_model
        # Training from scratch vs training from pre-trained weights
        training_from_scratch = args.training_from_scratch
        # epochs for training
        epochs = args.epochs
        # batch size for training
        batch_size = args.batch_size

        training(path_save_spectrogram, weights_path, name_model, True, epochs,
                 batch_size, 0)
        # for id in range(1):
        #    training(path_save_spectrogram, weights_path, name_model, True, epochs, batch_size, id)

    elif prediction_mode:
        # Example: python main.py --mode="prediction"

        # path to find pre-trained weights / save models
        weights_path = args.weights_folder
        # pre trained model
        name_model = args.name_model
        # directory where read noisy sound to denoise
        audio_dir_prediction = args.audio_dir_prediction
        print(audio_dir_prediction)

        # Name noisy sound file to denoise