Beispiel #1
0
def prepare_experimental_data():
    path = "data/prepared_data"
    label_path = "data/labels/self_report.csv"
    all_labels = load_all_labels(label_path)
    participant_list = [
        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
        38, 40, 41, 42, 43
    ]
    labels = load_all_labels(LABEL_TYPE)
    all_labels = \
        load_labels(labels, participant_list, type=LABEL_TYPE)
    labels = all_labels
    # CLASSES COUNT
    for i in range(len(CLASSES)):
        print("class count", CLASSES[i],
              (np.array(all_labels) == CLASSES[i]).sum())

    physiological_data = load_all_physiological(path, participant_list)

    participants, trials = np.array(all_labels).shape
    all_processed_physiological = []
    for p in range(participants):
        all_trials_physiological = []
        for t in range(trials):
            # preprocessing
            # Ignores 8 seconds from the start of each trial
            data = physiological_data[p, t, IGNORE_TIME * PPG_SAMPLING_RATE:,
                                      0]
            preprocessed_physiological = \
                physiological_preprocessing(data,
                                            sampling_rate=PPG_SAMPLING_RATE)

            all_trials_physiological.append(preprocessed_physiological)

        all_processed_physiological.append(all_trials_physiological)
    physiological_data = np.array(all_processed_physiological)

    return physiological_data, labels
Beispiel #2
0
    def __init__(self,
                 batch_size,
                 nv,
                 num_classes,
                 shapes,
                 desc_path,
                 labels_path,
                 patch_op_path,
                 radius,
                 nrings,
                 ndirs,
                 ratio,
                 shuffle=True):
        'Initialization'
        self.nv = nv
        if nv is None:
            self.batch_size = 1
        else:
            self.batch_size = batch_size

        # save shapes names
        self.names = load_names(shapes)

        # load patch op
        contributors, weights, transport, parents, angular_shifts = load_patch_op(
            shapes_names_txt=shapes,
            shapes_nv=nv,
            radius=radius,
            nrings=nrings,
            ndirs=ndirs,
            ratio=ratio,
            dataset_path=patch_op_path)

        # load signal
        descriptors = load_descriptors(shapes, desc_path)
        self.nsamples = descriptors.shape[0]
        self.input_dim = descriptors.shape[-1]

        # load labels
        self.labels = load_labels(shapes,
                                  labels_path,
                                  num_classes,
                                  to_categorical=False)

        x = [descriptors]
        self.keys = ['input_signal']
        for j in range(len(contributors)):
            self.keys.append('contributors_' + int_to_string(j))
            self.keys.append('weights_' + int_to_string(j))
            self.keys.append('transport_' + int_to_string(j))
        for j in range(len(parents)):
            self.keys.append('parents_' + int_to_string(j))
            self.keys.append('angular_shifts_' + int_to_string(j))

        for j in range(len(contributors)):
            x.append(contributors[j])
            x.append(weights[j])
            x.append(transport[j])

        for j in range(len(parents)):
            x.append(parents[j])
            x.append(angular_shifts[j])

        self.inputs = dict(zip(self.keys, x))
        # self.nsamples = np.ma.size(self.inputs['input_signal'], axis=0)
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.on_epoch_end()
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sess.run(tf.initialize_all_variables())

datasets = ['./data/data_batch_1', './data/data_batch_2', './data/data_batch_3', './data/data_batch_4','./data/data_batch_5']
epoch = input('\nPlease input epoch:')
batch_size = input('Please input batch size:')
temp = 50000/batch_size

t1 = time.time()
start_index = 0
images = np.zeros([50000, 32, 32, 3])
labels = np.zeros([50000, 10])
for j in xrange(5):
	images[j*10000:(j+1)*10000] = load_data.load_images(datasets[j])
	labels[j*10000:(j+1)*10000] = load_data.load_labels(datasets[j])


for i in xrange(epoch * temp):
	batch_x, batch_y, start_index = load_data.next_batch(images, labels, batch_size, start_index)
	_, loss_val = sess.run([train_step, cross_entropy], feed_dict={x:batch_x, y_:batch_y, keep_prob: 0.8})
	if i%temp == 0:
		print 'epoch %d , loss = %s' % (i/temp, loss_val)

del images, labels

print '\nEnd training.\n'
t2 = time.time()

test_x, test_y = load_data.load_data('./data/test_batch')
Beispiel #4
0
Question 4: Feature extraction and classification

@author: Emma Strubell
'''

import load_data as load
import rbm

train_size = load.max_train_size
test_size = load.max_test_size

print "Loading %d/%d training instances" % (train_size, load.max_train_size)
train_instances = load.load_data('train', train_size)

print "Loading %d/%d training labels" % (train_size, load.max_train_size)
train_labels = load.load_labels('train', train_size)

print "Loading %d/%d training instances" % (test_size, load.max_test_size)
test_instances = load.load_data('test', test_size)

print "Loading %d/%d training labels" % (test_size, load.max_test_size)
test_labels = load.load_labels('test', test_size)

print "Loading model parameters"
w_c = load.load_params("C400")
w_b = load.load_params("B400")
w_p = load.load_params("P400")

def q4a():
    # compute embeddings for train and test data
    print "Computing embeddings"
Beispiel #5
0
from net_architectures import build_feedforward
### MAIN ###

a = ArgumentParser()
a.add_argument('-c', dest='config', required=True, type=str)
opts = a.parse_args()
settings = load_config(opts.config)

# Load training data
file_list = os.listdir(settings.wav_folder)
#TODO: check that there are only wave files
waveforms = load_waves(settings, file_list)
mfccs, max_T, all_lengths = extract_mfccs(waveforms, settings) #TODO: what other features? intensity? HNR?
# TODO: we could work with mels instead of mfccs
# print mfccs.shape # (number of files, max_T, 12 coeff)
alignments = load_labels(settings, file_list)
targets = upsample_alignment(alignments, max_T, settings)
print ('mfccs shape', np.array(mfccs).shape)
print ('targets shape', np.array(targets).shape)

# x = mfccs and y = labels
x = np.array(mfccs)
y = np.array(targets)

x_fbf = []
y_fbf = []
c_1 = 0
c_0 = 0

# Count number of data points with targets equal 1s and 0s, balance data if unbalanced
num_ones = (y[:,:,0] == 1).sum()
Beispiel #6
0
test_dir = sys.argv[1]
csv_dir = sys.argv[2]

csv_name = os.path.join(csv_dir, 'predict.csv')

h = 28
w = 28
num_classes = 10

X_test = load_image(test_dir)
X_test = X_test.reshape(10000, h, w, 1)
X_test = X_test.astype('float32')
X_test /= 127.5
X_test -= 1

Y_train = load_labels()
Y_train = keras.utils.to_categorical(Y_train, num_classes)

model = load_model('2conv.h5')

# model.save(save_model_path)

predict_test = model.predict_classes(X_test)

# print(predict_test)

f = open(csv_name, 'w')
f.write('image_id,predicted_label\n')
for i in range(len(predict_test)):
    f.write(str(i) + ',' + str(predict_test[i]) + '\n')
f.close()
Beispiel #7
0
def shape_dataset_segmentation(train_txt,
                               test_txt,
                               patch_op_path,
                               desc_path,
                               input_dim,
                               nclasses,
                               labels_path,
                               radius,
                               nbatch,
                               nv,
                               nrings,
                               ndirs,
                               ratio,
                               nepochs,
                               generator=None,
                               classes=None,
                               save_dir=None,
                               model_name='model'):
    if model_name is 'async':
        sync_mode = 'async'
    else:
        sync_mode = 'radial_sync'

    # create model

    model = gcnn_resnet_v1(n_batch=nbatch,
                           ratio=ratio,
                           n_v=nv,
                           n_rings=nrings,
                           n_dirs=ndirs,
                           fixed_patch_op=False,
                           contributors=None,
                           weights=None,
                           angles=None,
                           parents=None,
                           angular_shifts=None,
                           batch_norm=False,
                           uv=None,
                           input_dim=input_dim,
                           nstacks=1,
                           nresblocks_per_stack=2,
                           nfilters=16,
                           sync_mode=sync_mode,
                           num_classes=nclasses)

    # load patch op
    train_c, train_w, train_t_a, train_p, train_a_s = load_patch_op(
        shapes_names_txt=train_txt,
        shapes_nv=nv,
        radius=radius,
        nrings=nrings,
        ndirs=ndirs,
        ratio=ratio,
        dataset_path=patch_op_path)

    test_c, test_w, test_t_a, test_p, test_a_s = load_patch_op(
        shapes_names_txt=test_txt,
        shapes_nv=nv,
        radius=radius,
        nrings=nrings,
        ndirs=ndirs,
        ratio=ratio,
        dataset_path=patch_op_path)

    # load signal

    train_desc = load_descriptors(train_txt, desc_path)
    n_train_samples = train_desc.shape[0]

    test_desc = load_descriptors(test_txt, desc_path)
    n_test_samples = test_desc.shape[0]

    # load labels

    y_train = load_labels(train_txt, labels_path, nclasses)
    y_test = load_labels(test_txt, labels_path, nclasses)

    x_train = [train_desc]
    x_test = [test_desc]

    input_names = ['input_signal']
    for j in range(len(train_c)):
        input_names.append('contributors_' + int_to_string(j))
        input_names.append('weights_' + int_to_string(j))
        input_names.append('transport_' + int_to_string(j))
    for j in range(len(train_p)):
        input_names.append('parents_' + int_to_string(j))
        input_names.append('angular_shifts_' + int_to_string(j))

    for j in range(len(train_c)):
        x_train.append(train_c[j])
        x_train.append(train_w[j])
        x_train.append(train_t_a[j])

        x_test.append(test_c[j])
        x_test.append(test_w[j])
        x_test.append(test_t_a[j])

    for j in range(len(train_p)):
        x_train.append(train_p[j])
        x_train.append(train_a_s[j])

        x_test.append(test_p[j])
        x_test.append(test_a_s[j])

    print('shapes !!!')
    for x_ in x_test:
        print(np.shape(x_))

    x_train = dict(zip(input_names, x_train))
    x_test = dict(zip(input_names, x_test))

    # train model

    opt = 'adam'

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    model.summary()

    if generator is None:
        history = model.fit(x_train,
                            y_train,
                            batch_size=nbatch,
                            epochs=nepochs,
                            validation_data=(x_test, y_test),
                            shuffle=True)
    else:
        training_generator = generator(x_train,
                                       y_train,
                                       nbatch,
                                       nv,
                                       n_classes=nclasses,
                                       shuffle=True)
        test_generator = generator(x_test,
                                   y_test,
                                   nbatch,
                                   nv,
                                   n_classes=nclasses,
                                   shuffle=True)

        history = model.fit_generator(generator=training_generator,
                                      steps_per_epoch=n_train_samples / nbatch,
                                      epochs=nepochs,
                                      validation_data=test_generator,
                                      validation_steps=1,
                                      use_multiprocessing=False,
                                      workers=1)

    # Score trained model.
    scores = model.evaluate(x_test, y_test, verbose=1, batch_size=nbatch)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])

    if save_dir is not None:
        # Save model and weights
        if not os.path.isdir(save_dir):
            os.makedirs(save_dir)
        model_path = os.path.join(save_dir, model_name)
        weights_path = os.path.join(save_dir, model_name + '_weights.h5')

        # Option 1: Save Weights + Architecture
        model.save_weights(weights_path)
        with open(model_path + '.json', 'w') as f:
            f.write(model.to_json())
        model.save(model_path + '.h5')

        print('Saved trained model at %s ' % model_path)

        # confusion matrix

        # plot confusion matrix

        y_pred = model.predict(x_test, batch_size=nbatch, verbose=0)

        # plot_confusion_mat_(y_true=y_test, y_pred=y_pred, classes=classes,
        #                     save_path=os.path.join(save_dir, model_name + '_conf_mat'))
        plt_history(history=history,
                    save_path=os.path.join(save_dir, model_name + '_history'))
    else:
        plt_history(history=history, save_path=None)