Beispiel #1
0
def load_sensor_files(training_data_path, testing_data_path,
                      num_training_samples=1000000,
                      num_training_samples_per_file=100,
                      num_test_samples=10000,
                      num_test_samples_per_file=10,
                      history_length=3,
                      feature_indexes=None,
                      label_indexes=None,
                      shared=True):

    from os import listdir
    from os.path import isdir
    from random import sample
    from random import shuffle
    from numpy import array

    file_names = listdir(training_data_path)
    training_file_paths = [training_data_path + name for name in file_names]
    training_file_paths = [path for path in training_file_paths if not (isdir(path) or "trajectory" in path)]
    num_training_files = min([len(training_file_paths), max([1, num_training_samples / num_training_samples_per_file])])
    training_file_paths = sample(training_file_paths, num_training_files)
    shuffle(training_file_paths)

    file_names = listdir(testing_data_path)
    test_file_paths = [testing_data_path + name for name in file_names]
    test_file_paths = [path for path in test_file_paths if not (isdir(path) or "trajectory" in path)]
    num_test_files = min([len(test_file_paths), max([1, num_test_samples / num_test_samples_per_file])])
    test_file_paths = sample(test_file_paths, num_test_files)
    shuffle(test_file_paths)

    print '... loading training data'
    training_data, training_labels = load_data_set(training_file_paths, num_training_samples_per_file, history_length, feature_indexes, label_indexes)
    print '... loading testing data'
    test_data, test_labels = load_data_set(test_file_paths, num_test_samples_per_file, history_length, feature_indexes, label_indexes)

    print '... ' + str(len(training_data)) + ' training samples loaded'
    print '... ' + str(len(test_data)) + ' test samples loaded'

    if shared:
        training_data = shared_dataset(training_data)
        training_labels = shared_dataset(training_labels)
        test_data = shared_dataset(test_data)
        test_labels = shared_dataset(test_labels)

    return training_data, training_labels, test_data, test_labels
Beispiel #2
0
def load_config_data(config_path):
    from os import listdir
    from data_loader import shared_dataset
    from numpy import array

    num_files_per_layer = 4

    file_names = listdir(config_path)
    file_names = sorted(file_names)
    autoencoder_file_paths = [config_path + name for name in file_names if name.startswith("al")]
    supervised_file_paths = [config_path + name for name in file_names if name.startswith("sl")]

    autoencoder_weights = []
    for i in range(len(autoencoder_file_paths)/num_files_per_layer):
        with open(autoencoder_file_paths[num_files_per_layer*i], 'r') as weights_file:
            weights_data = weights_file.read()
        weights = array([[float(value) for value in line.split(",")] for line in weights_data.split('\n')]).T

        with open(autoencoder_file_paths[num_files_per_layer*i + 1], 'r') as weights_prime_file:
            weights_prime_data = weights_prime_file.read()
        weights_prime = array([[float(value) for value in line.split(",")] for line in weights_prime_data.split('\n')]).T

        with open(autoencoder_file_paths[num_files_per_layer*i + 2], 'r') as bias_file:
            bias_data = bias_file.readline()
        bias = array([float(value) for value in bias_data.split(",")])

        with open(autoencoder_file_paths[num_files_per_layer*i + 3], 'r') as bias_prime_file:
            bias_prime_data = bias_prime_file.readline()
        bias_prime = array([float(value) for value in bias_prime_data.split(",")])

        weights = shared_dataset(weights, name='W')
        bias = shared_dataset(bias, name='b')
        weights_prime = shared_dataset(weights_prime, name='Whid')
        bias_prime = shared_dataset(bias_prime, name='bvis')

        autoencoder_weights.append((weights, bias, weights_prime, bias_prime))

    with open(supervised_file_paths[0], 'r') as supervised_weights_file:
        supervised_weights_data = supervised_weights_file.read()
    supervised_weights = shared_dataset(array([[float(value) for value in line.split(",")] for line in supervised_weights_data.split('\n')]).T, name='W')

    with open(supervised_file_paths[1], 'r') as supervised_bias_file:
        supervised_bias_data = supervised_bias_file.readline()
    supervised_bias = shared_dataset(array([float(value) for value in supervised_bias_data.split(",")]), name='b')

    config_file_path = config_path + "conf.txt"
    with open(config_file_path, 'r') as config_file:
        config_data = config_file.readlines()

    return autoencoder_weights, (supervised_weights, supervised_bias), config_data