# read soem percent of data to be used for supervised training.... train_x, train_y = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.10) # read almost the full data to be used for unsupervised training ..... train_x_full, train_y_full = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.99) valid_x, valid_y = timit.readTIMIT('timit-mono-mfcc-valid.pfile.gz', shared=False, listify=False, mapping=48) test_x, test_y = timit.readTIMIT('timit-mono-mfcc-test.pfile.gz', shared=False, listify=False, mapping=48) # stack the list of data to make on single matrix of trianing data ... train_x_all = np.vstack(train_x) train_y_all = np.hstack(train_y) train_x_unsup = np.vstack(train_x_full) train_y_unsup = np.hstack(train_y_full) train_x_all, train_y_all = timit.shared_dataset((train_x_all, train_y_all)) train_x_unsup, train_y_unsup = timit.shared_dataset((train_x_unsup, train_y_unsup)) train_y = map(lambda x: map_y_48(x), train_y) valid_y, test_y = map_y_48(valid_y), map_y_48(test_y) train_x, train_y = timit.make_shared_partitions(train_x, train_y) valid_x, valid_y = timit.shared_dataset((valid_x, valid_y)) test_x, test_y = timit.shared_dataset((test_x, test_y)) train_set_x = train_x_unsup print train_x_all.get_value().shape[0] print train_set_x.get_value().shape[0] # nn_ae = DNN(numpy_rng, [5096, 5096], 429, 144) # nn_ae = DNN(numpy_rng, [6000, 6000], 429, 39)
# nn = DNN(numpy_rng, [6096, 6096], 429, 144) nn = DNN(numpy_rng, [hu,], 429, 48) #nn = DNN(numpy_rng, [10096], 1320, 48) MODE = 'usevalid' train_x, train_y = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, percent_data=percent, randomise=True) valid_x, valid_y = timit.readTIMIT('timit-mono-mfcc-valid.pfile.gz', shared=False, listify=False, mapping=48) test_x, test_y = timit.readTIMIT('timit-mono-mfcc-test.pfile.gz', shared=False, listify=False, mapping=48) #train_y = map(lambda x: map_y_48(x), train_y) #valid_y, test_y = map_y_48(valid_y), map_y_48(test_y) # this mode uses the standard validation set if MODE == 'usevalid': train_x, train_y = timit.make_shared_partitions(train_x, train_y) valid_x, valid_y = timit.shared_dataset((valid_x, valid_y)) test_x, test_y = timit.shared_dataset((test_x, test_y)) num_partitions = len(train_x) print num_partitions for i in xrange(num_partitions): train_set_x = train_x[i] train_set_y = train_y[i] train_set_xy = (train_set_x, train_set_y) timit = [train_set_xy, (valid_x, valid_y), (test_x, test_y)] bsgd(nn, timit, epochs=80, lr=0.008) else: print len(train_x) num_partitions = len(train_x)