Esempio n. 1
0
def process_data_set(data_set, filter_threshold=0.1):
    # %%Preprocess andrews
    print('#' * 10)
    print(data_set)
    print('#' * 10)

    file_names = misc.folder_names(data_set, 'none')

    pca_file = os.path.join(file_names['pca_file'])
    data_file = os.path.join(file_names['npz_file'])

    # Reading in all data
    print('---> READING AND PREPROCESSING DATA')
    files = os.listdir(data_set)
    files = natsort.natsorted(files)
    n_locations = len(files)

    test_data, az, el = preprocess(os.path.join(data_set, files[0]),
                                   verbose=False)
    n_samples = test_data.shape[2]

    all_mns = numpy.zeros([n_locations, 7, 31, n_samples])
    all_vrs = numpy.zeros([n_locations, 7, 31, n_samples])

    for i in range(len(files)):
        data, _, _ = preprocess(os.path.join(data_set, files[i]))

        mns = numpy.mean(data, axis=3)
        vrs = numpy.var(data, axis=3)

        print(data.shape)
        all_mns[i, :, :, :] = mns
        all_vrs[i, :, :, :] = vrs

    # get ID vars
    print('---> DATA2LONG')
    n = numpy.arange(n_locations)
    az_line = az[0, :]
    el_line = el[:, 0]
    locs, azs, els = numpy.meshgrid(n, az_line, el_line)
    locs = numpy.transpose(locs, axes=(1, 2, 0))
    azs = numpy.transpose(azs, axes=(1, 2, 0))
    els = numpy.transpose(els, axes=(1, 2, 0))

    # Reshape data to long format
    long_data = numpy.reshape(all_mns, (n_locations * 7 * 31, n_samples))
    long_lcs = numpy.reshape(locs, (n_locations * 7 * 31))
    long_azs = numpy.reshape(azs, (n_locations * 7 * 31))
    long_els = numpy.reshape(els, (n_locations * 7 * 31))

    id_array = numpy.column_stack((long_lcs, long_azs, long_els))

    # Get the variation across all measurements for each sample
    sample_variance = numpy.mean(all_vrs, axis=(0, 1, 2))

    # Select only those templates above threshold
    summed = numpy.sum(long_data, axis=1)
    summed = numpy.array(summed)
    threshold = numpy.min(summed) + filter_threshold
    include = summed > threshold

    long_data = long_data[include, :]
    id_array = id_array[include, :]
    long_lcs = long_lcs[include]
    long_azs = long_azs[include]
    long_els = long_els[include]

    # Save data
    print('---> SAVING LONG FORMAT')
    numpy.savez(data_file,
                long_data=long_data,
                long_lcs=long_lcs,
                long_azs=long_azs,
                long_els=long_els,
                ids=id_array,
                sample_variance=sample_variance,
                include=include,
                files=files)

    # %%
    print('---> RUN AND SAVE PCA MODEL')
    pca_model = PCA()
    pca_model.fit(long_data)
    pickle_save(pca_file, pca_model)
    print('#' * 10)
import misc
import numpy
import settings
import os
from matplotlib import pyplot

pca_plot_file = os.path.join(settings.figure_folder, 'pca.pdf')

pyplot.style.use(settings.style)
pyplot.figure(figsize=(4, 4))
for data_set in ['israel', 'royal']:

    files = misc.folder_names(data_set, 'lcs')
    pca = misc.pickle_load(files['pca_file'])
    cvar = numpy.cumsum(pca.explained_variance_ratio_)
    if data_set == 'royal':
        pyplot.plot(cvar, color='k', linestyle=settings.royal_linestyle)
    if data_set == 'israel':
        pyplot.plot(cvar, color='k', linestyle=settings.israel_linestyle)

pyplot.legend(['Israel', 'Royal'])
pyplot.xlabel('Nr of PCs')
pyplot.ylabel('Proportion of explained variance')
pyplot.tight_layout()
pyplot.savefig(pca_plot_file)
pyplot.show()
from tensorflow import keras
import misc
import settings
import os

files = misc.folder_names('israel', 'azs')
model = keras.models.load_model(files['model_file'])

output = os.path.join(settings.figure_folder, 'model.pdf')

keras.utils.plot_model(model,
                       to_file=output,
                       show_shapes=True,
                       show_layer_names=False,
                       rankdir='TB',
                       expand_nested=True,
                       dpi=96)
import scipy.stats as stats
from matplotlib import pyplot
import os
import misc
import numpy
import settings

pyplot.style.use(settings.style)

data_set = 'royal'

loss_function = 'Categorical Cross Entropy'

files_lcs = misc.folder_names(data_set, 'lcs')
files_azs = misc.folder_names(data_set, 'azs')
files_els = misc.folder_names(data_set, 'els')
results_azs = misc.pickle_load(files_azs['results_file'])
results_els = misc.pickle_load(files_els['results_file'])
results_lcs = misc.pickle_load(files_lcs['results_file'])
perfect_memory = misc.pickle_load(files_lcs['perfect_memory_file'])

output_file = os.path.join(settings.figure_folder,
                           data_set + '_performance.pdf')

# %% Plot errors

fig, axes = pyplot.subplots(nrows=3, ncols=3)
fig.set_figheight(10)
fig.set_figwidth(10)

pyplot.sca(axes[0, 0])
Esempio n. 5
0
import numpy
import pandas
import scipy.spatial.distance as distance

import misc
import process_functions
import settings

data_set = 'israel'
generate_data = False
iterations = 10

print('Running perfect memory for', data_set)

file_names = misc.folder_names(data_set, None)
output_file = file_names['perfect_memory_file']
if generate_data: process_functions.process_data_set(data_set)

# Read prepared data
data = numpy.load(file_names['npz_file'])
corridor_distances = misc.map_lcs_to_distances(data)

pca = misc.pickle_load(file_names['pca_file'])

correct_ids = data['ids']
correct_ids[:, 0] = corridor_distances

templates = data['long_data']
pca_templates = pca.transform(templates)
n_components = settings.n_components
inputs = pca_templates[:, :n_components]
import os
from matplotlib import pyplot
from tensorflow import keras
import pandas
import misc
import process_functions
import settings

ns = []
ws = []
ws_nb = []
rs = []

for data_set in ['israel', 'royal']:
    selected_dimension = 'lcs'
    file_names = misc.folder_names(data_set, selected_dimension)
    data = numpy.load(file_names['npz_file'])
    templates = data['long_data']

    total_n_numbers = templates.shape[0] * settings.n_components

    new_model = keras.models.load_model(file_names['model_file'])
    weights = new_model.get_weights()
    total_n_weights = 0
    total_n_weights_no_bias = 0
    for w in weights:
        m = numpy.matrix(w)
        total_n_weights = total_n_weights + (m.shape[0] * m.shape[1])
        if m.shape[0] > 1:
            total_n_weights_no_bias = total_n_weights_no_bias + (m.shape[0] *
                                                                 m.shape[1])
Esempio n. 7
0
import settings
import os
from matplotlib import pyplot
import smoothn

training_history_plot_file = os.path.join(settings.figure_folder, 'history.pdf')

pyplot.style.use(settings.style)

pyplot.figure(figsize=(10,5))

for data_set in ['israel', 'royal']:
    if data_set == 'israel': pyplot.subplot(1, 2, 1)
    if data_set == 'royal': pyplot.subplot(1, 2, 2)
    for dimension in ['azs', 'els', 'lcs']:
        files = misc.folder_names(data_set, dimension)
        history_file = files['history_file']
        history = misc.pickle_load(history_file)
        if data_set == 'israel': linestyle = settings.israel_linestyle
        if data_set == 'royal': linestyle = settings.royal_linestyle

        if dimension == 'azs': color = settings.azs_color
        if dimension == 'els': color = settings.els_color
        if dimension == 'lcs': color = settings.lcs_color

        trace = numpy.array(history['loss'])
        trace = smoothn.smoothn(trace, s0=1)[0]

        pyplot.plot(trace, color=color)
        pyplot.ylim([0, 4])
        pyplot.xlabel('Epoch')