Ejemplo n.º 1
0
def run_network_keras(config, output_file_name=None):

    for line in config:
        print line, config[line]
        print

    # Unpack configs required
    learning_params = config['LearningParameters']
    net_structure = config['NetworkStructure']
    fitting_params = config['TrainingParameters']
    file_config = config['FileConfig']

    out_directory = file_config['output_directory']

    if output_file_name is None:  # Use the config files name only if we're not provided one here.
        output_file_name = file_config['output_file_name']

    train_data = get_training_data(config['DatabaseConfig'],
                                   config['PreprocessingConfig'],
                                   config['FileConfig'])

    # Training data comes packed in to dictionaries to avoid needing to return a whole pile of values.
    train_in = train_data['train_in']
    train_out = train_data['train_out']
    test_in = train_data['test_in']
    test_out = train_data['test_out']
    galaxy_ids_test = train_data['galaxy_ids_test']
    out_normaliser = train_data['out_normaliser']

    # Some general statistics of the dataset
    mean_in = train_data['mean_in']
    mean_out = train_data['mean_out']
    stddev_in = train_data['stddev_in']
    stddev_out = train_data['stddev_out']
    min_out = train_data['min_out']
    max_out = train_data['max_out']

    LOG.info('Dataset shape train')
    LOG.info('{0}'.format(np.shape(train_in)))
    LOG.info('{0}'.format(np.shape(train_out)))
    LOG.info('Dataset shape test')
    LOG.info('{0}'.format(np.shape(test_in)))
    LOG.info('{0}'.format(np.shape(test_out)))

    LOG.info('Compiling neural network model')

    if net_structure['optimiser'] == 'sgd':
        optimiser = SGD(lr=learning_params['learning_rate'],
                        momentum=learning_params['momentum'],
                        decay=learning_params['decay'],
                        nesterov=True)
    else:
        optimiser = net_structure['optimiser']

    print optimiser

    input_dim = len(train_in[0])
    try:
        output_dim = len(train_out[0])
    except:
        output_dim = 1

    model = build_network(net_structure, input_dim, output_dim, optimiser)

    LOG.info("Compiled.")

    # Train the model each generation and show predictions against the validation dataset
    history_log = History_Log()
    trained = False
    total_epoch = 0

    epoch_history = []
    differences = []

    lowest_val_loss = 99999
    lowest_val_loss_weights = None  # Best weight configuration with lowest validation loss
    lowest_val_loss_epoch = 0

    while not trained:

        LOG.info('epoch {0} / {1}'.format(total_epoch,
                                          fitting_params['max_epochs']))

        model.fit(train_in,
                  train_out,
                  batch_size=fitting_params['batch_size'],
                  nb_epoch=fitting_params['epochs_per_fit'],
                  validation_split=fitting_params['validation_split'],
                  show_accuracy=True,
                  verbose=False,
                  callbacks=[history_log])

        LOG.info('{0}'.format(history_log.epoch_data))
        total_epoch += fitting_params['epochs_per_fit']

        if np.isnan(history_log.epoch_data['val_loss']):
            raise FuckingNaN("Nan'd")

        # If the val loss is lower, save the weights
        if history_log.epoch_data['val_loss'] < lowest_val_loss:
            # We have something with lower validation loss.
            lowest_val_loss = history_log.epoch_data['val_loss']
            lowest_val_loss_weights = weights_to_list(model)
            lowest_val_loss_epoch = total_epoch

        # Predict a test sample (change 1 to any other value to test more than 1)
        # and use it to track how the network's output for this/these test(s) changes over time.
        prediction = model.predict(np.array(test_in[:3]))
        differences.append(prediction)

        epoch_history.append(history_log.epoch_data)

        if history_log.epoch_data[
                'val_loss'] < 0.001 or total_epoch >= fitting_params[
                    'max_epochs']:
            trained = True

    differences = np.array(
        differences)  # Need special np indexing on this later

    if not out_directory:
        out_directory = os.getcwd()

    if not os.path.exists(out_directory):
        os.makedirs(out_directory)

    if not os.path.exists('{0}/graph'.format(out_directory)):
        os.mkdir('{0}/graph'.format(out_directory))

    print '{0}/graph'.format(out_directory)

    graph_out = '{0}/graph/{1}'.format(out_directory, output_file_name)

    # Save network weights, graph of network and loss over epoch graph.
    to_graph(model).write_svg("{0}_Graph.svg".format(graph_out))
    model.save_weights('{0}_weights.h5'.format(graph_out), overwrite=True)
    save_mean_convergence_graph('{0}_convergence'.format(graph_out),
                                differences, mean_out, min_out, max_out,
                                fitting_params['epochs_per_fit'])
    save_graph(epoch_history, '{0}_loss'.format(graph_out),
               fitting_params['epochs_per_fit'])

    # do 30 tests on the network's final weights.
    test_results, test_means, test_std = do_tests(model, test_in,
                                                  fitting_params['num_tests'])
    # evaluate the network on its final weights
    evaluation = {
        'At end of training:': model.evaluate(test_in, test_out, 1000, True,
                                              True)
    }

    # do 30 tests on the lowest validation loss weights.
    model = weight_from_list(model, lowest_val_loss_weights)
    model.save_weights('{0}_best_weights.h5'.format(graph_out), overwrite=True)
    val_test_results, val_test_means, val_test_std = do_tests(
        model, test_in, fitting_params['num_tests'])
    # evaluate the network on the lowest validation loss weights
    evaluation['Best validation loss:'] = model.evaluate(
        test_in, test_out, 1000, True, True)

    with open('{0}/{1}.txt'.format(out_directory, output_file_name), 'w') as f:
        write_summary_data(f, config, evaluation, epoch_history, mean_in,
                           stddev_in, mean_out, stddev_out,
                           (lowest_val_loss_epoch, lowest_val_loss))

        f.write(
            '\n\n\n\n----------TEST DATA FOR FINAL MODEL----------\n\n\n\n')
        write_test_data(f, test_results, test_in, test_out, test_means,
                        test_std, galaxy_ids_test, out_normaliser,
                        config['PreprocessingConfig']['single_output'])
        f.write(
            '\n\n\n\n----------TEST DATA FOR BEST VALIDATION LOSS MODEL----------\n\n\n\n'
        )
        write_test_data(f, val_test_results, test_in, test_out, val_test_means,
                        val_test_std, galaxy_ids_test, out_normaliser,
                        config['PreprocessingConfig']['single_output'])
Ejemplo n.º 2
0
def run_network_keras(hidden_connections, hidden_layers, loss,
                      single_output=None, single_input=None,
                      normalise_input=None, normalise_output=None,
                      input_filter_types=None,
                      use_graph=False,
                      unknown_input_handler=None):

    config['input_filter_types'] = input_filter_types

    train_data = get_training_data(config,
                                   tmp_file,
                                   single_output,
                                   single_input,
                                   normalise_input,
                                   normalise_output,
                                   unknown_input_handler,
                                   percentile_bin=config['percentile_bin'],
                                   erase_above=config['erase_above'])

    test_data = config['test_data'] # number of test sets

    train_in = train_data['train_in']
    train_out = train_data['train_out']
    test_in = train_data['test_in']
    test_out = train_data['test_out']

    redshifts_train = train_data['redshifts_train']

    galaxy_ids_test = train_data['galaxy_ids_test']

    out_normaliser = train_data['out_normaliser']

    mean_in = train_data['mean_in']
    mean_out = train_data['mean_out']

    stddev_in = train_data['stddev_in']
    stddev_out = train_data['stddev_out']

    print np.shape(train_in)
    print np.shape(train_out)
    print np.shape(test_in)
    print np.shape(test_out)

    LOG.info('Compiling neural network model')

    optimiser = SGD(lr=learning_params['learning_rate'], momentum=learning_params['momentum'], decay=learning_params['decay'], nesterov=True)

    input_dim = len(train_in[0])
    try:
        output_dim = len(train_out[0])
    except:
        output_dim = 1

    model = Sequential()

    model.add(Dense(output_dim=hidden_connections, input_dim=input_dim))
    model.add(PReLU())
    model.add(Dense(output_dim=input_dim, input_dim=hidden_connections))

    model.compile(loss='mse', optimizer=RMSprop(lr=0.001), class_mode='binary')

    model.fit(train_in, train_in, 5000, 10000, validation_split=0.3, verbose=True, show_accuracy=True)

    for i in range(0, 30):
        ans = model.predict(np.array([test_in[i]]))

        print 'Test', test_in[i]
        print 'Ans', ans[0]
        print
        print

    exit()
Ejemplo n.º 3
0
def run_network_keras(config, output_file_name=None):

    for line in config:
        print line, config[line]
        print

    # Unpack configs required
    learning_params = config['LearningParameters']
    net_structure = config['NetworkStructure']
    fitting_params = config['TrainingParameters']
    file_config = config['FileConfig']

    out_directory = file_config['output_directory']

    if output_file_name is None:  # Use the config files name only if we're not provided one here.
        output_file_name = file_config['output_file_name']

    train_data = get_training_data(config['DatabaseConfig'], config['PreprocessingConfig'], config['FileConfig'])

    # Training data comes packed in to dictionaries to avoid needing to return a whole pile of values.
    train_in = train_data['train_in']
    train_out = train_data['train_out']
    test_in = train_data['test_in']
    test_out = train_data['test_out']
    galaxy_ids_test = train_data['galaxy_ids_test']
    out_normaliser = train_data['out_normaliser']

    # Some general statistics of the dataset
    mean_in = train_data['mean_in']
    mean_out = train_data['mean_out']
    stddev_in = train_data['stddev_in']
    stddev_out = train_data['stddev_out']
    min_out = train_data['min_out']
    max_out = train_data['max_out']

    LOG.info('Dataset shape train')
    LOG.info('{0}'.format(np.shape(train_in)))
    LOG.info('{0}'.format(np.shape(train_out)))
    LOG.info('Dataset shape test')
    LOG.info('{0}'.format(np.shape(test_in)))
    LOG.info('{0}'.format(np.shape(test_out)))

    LOG.info('Compiling neural network model')

    if net_structure['optimiser'] == 'sgd':
        optimiser = SGD(lr=learning_params['learning_rate'], momentum=learning_params['momentum'], decay=learning_params['decay'], nesterov=True)
    else:
        optimiser = net_structure['optimiser']

    print optimiser

    input_dim = len(train_in[0])
    try:
        output_dim = len(train_out[0])
    except:
        output_dim = 1

    model = build_network(net_structure, input_dim, output_dim, optimiser)

    LOG.info("Compiled.")

    # Train the model each generation and show predictions against the validation dataset
    history_log = History_Log()
    trained = False
    total_epoch = 0

    epoch_history = []
    differences = []

    lowest_val_loss = 99999
    lowest_val_loss_weights = None  # Best weight configuration with lowest validation loss
    lowest_val_loss_epoch = 0

    while not trained:

        LOG.info('epoch {0} / {1}'.format(total_epoch, fitting_params['max_epochs']))

        model.fit(train_in, train_out, batch_size=fitting_params['batch_size'],
                  nb_epoch=fitting_params['epochs_per_fit'],
                  validation_split=fitting_params['validation_split'],
                  show_accuracy=True, verbose=False, callbacks=[history_log])

        LOG.info('{0}'.format(history_log.epoch_data))
        total_epoch += fitting_params['epochs_per_fit']

        if np.isnan(history_log.epoch_data['val_loss']):
            raise FuckingNaN("Nan'd")

        # If the val loss is lower, save the weights
        if history_log.epoch_data['val_loss'] < lowest_val_loss:
            # We have something with lower validation loss.
            lowest_val_loss = history_log.epoch_data['val_loss']
            lowest_val_loss_weights = weights_to_list(model)
            lowest_val_loss_epoch = total_epoch

        # Predict a test sample (change 1 to any other value to test more than 1)
        # and use it to track how the network's output for this/these test(s) changes over time.
        prediction = model.predict(np.array(test_in[:3]))
        differences.append(prediction)

        epoch_history.append(history_log.epoch_data)

        if history_log.epoch_data['val_loss'] < 0.001 or total_epoch >= fitting_params['max_epochs']:
            trained = True

    differences = np.array(differences)  # Need special np indexing on this later

    if not out_directory:
        out_directory = os.getcwd()

    if not os.path.exists(out_directory):
        os.makedirs(out_directory)

    if not os.path.exists('{0}/graph'.format(out_directory)):
        os.mkdir('{0}/graph'.format(out_directory))

    print '{0}/graph'.format(out_directory)

    graph_out = '{0}/graph/{1}'.format(out_directory, output_file_name)

    # Save network weights, graph of network and loss over epoch graph.
    to_graph(model).write_svg("{0}_Graph.svg".format(graph_out))
    model.save_weights('{0}_weights.h5'.format(graph_out), overwrite=True)
    save_mean_convergence_graph('{0}_convergence'.format(graph_out),differences, mean_out, min_out, max_out, fitting_params['epochs_per_fit'])
    save_graph(epoch_history, '{0}_loss'.format(graph_out), fitting_params['epochs_per_fit'])

    # do 30 tests on the network's final weights.
    test_results, test_means, test_std = do_tests(model, test_in, fitting_params['num_tests'])
    # evaluate the network on its final weights
    evaluation = {'At end of training:': model.evaluate(test_in, test_out, 1000, True, True)}

    # do 30 tests on the lowest validation loss weights.
    model = weight_from_list(model, lowest_val_loss_weights)
    model.save_weights('{0}_best_weights.h5'.format(graph_out), overwrite=True)
    val_test_results, val_test_means, val_test_std = do_tests(model, test_in, fitting_params['num_tests'])
    # evaluate the network on the lowest validation loss weights
    evaluation['Best validation loss:'] = model.evaluate(test_in, test_out, 1000, True, True)

    with open('{0}/{1}.txt'.format(out_directory, output_file_name), 'w') as f:
        write_summary_data(f,
                           config,
                           evaluation,
                           epoch_history,
                           mean_in,
                           stddev_in,
                           mean_out,
                           stddev_out,
                           (lowest_val_loss_epoch, lowest_val_loss)
                           )

        f.write('\n\n\n\n----------TEST DATA FOR FINAL MODEL----------\n\n\n\n')
        write_test_data(f,
                        test_results,
                        test_in,
                        test_out,
                        test_means,
                        test_std,
                        galaxy_ids_test,
                        out_normaliser,
                        config['PreprocessingConfig']['single_output']
                        )
        f.write('\n\n\n\n----------TEST DATA FOR BEST VALIDATION LOSS MODEL----------\n\n\n\n')
        write_test_data(f,
                        val_test_results,
                        test_in,
                        test_out,
                        val_test_means,
                        val_test_std,
                        galaxy_ids_test,
                        out_normaliser,
                        config['PreprocessingConfig']['single_output'])