Example #1
0
	y_39[y_48 == 43] = 37
	y_39[y_48 == 16] = 37
	y_39[y_48 == 9] = 37
	return y_39


BATCH_SIZE = 300
NUM_EPOCHS = 61
# mnist = mnist.load_mnist_theano('mnist.pkl.gz')
numpy_rng = np.random.RandomState(11111)
theano_rng = RandomStreams(numpy_rng.randint( 2**30 ))

# configuration for timit

# read soem percent of data to be used for supervised training....
train_x, train_y = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.10)

# read almost the full data to be used for unsupervised training .....
train_x_full, train_y_full = timit.readTIMIT('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.99)
valid_x, valid_y = timit.readTIMIT('timit-mono-mfcc-valid.pfile.gz', shared=False, listify=False, mapping=48)
test_x, test_y = timit.readTIMIT('timit-mono-mfcc-test.pfile.gz', shared=False, listify=False, mapping=48)

#  stack the list of data to make on single matrix of trianing data ...
train_x_all = np.vstack(train_x)
train_y_all = np.hstack(train_y)
train_x_unsup = np.vstack(train_x_full)
train_y_unsup = np.hstack(train_y_full)

train_x_all, train_y_all = timit.shared_dataset((train_x_all, train_y_all))
train_x_unsup, train_y_unsup = timit.shared_dataset((train_x_unsup, train_y_unsup))
Example #2
0
BATCH_SIZE = 400

numpy_rng = np.random.RandomState(11111)
theano_rng = RandomStreams(numpy_rng.randint( 2**30 ))

parser = argparse.ArgumentParser(description='setting up hyperparams by command line.')
parser.add_argument('--percent', '-p', type=float, default=0.999)
parser.add_argument('--beta', '-b', type=int, default=200)
parser.add_argument('--alpha', '-a', type=int, default=3)

args = parser.parse_args()
alpha = args.alpha
beta = args.beta
percent = args.percent

# parser.add_argument('')

print "fraction of training data used is:", percent
train_x_lab, train_y_lab, train_x_unlab = timit.readTIMITSSL('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=percent)
# train_x_lab, train_y_lab, train_x_unlab = timit.readTIMITSSL('timit-mono-mfcc-train.pfile.gz', shared=False, listify=True, mapping=48, randomise=True, percent_data=0.9999)

valid_x, valid_y = timit.readTIMIT('timit-mono-mfcc-valid.pfile.gz', shared=False, listify=False, mapping=48, percent_data=0.20, randomise=True)
# test_x, test_y = timit.readTIMIT('timit-mono-mfcc-test.pfile.gz', shared=False, listify=False, mapping=48)

# train_x, train_y  = timit.make_shared_partitions(train_x, train_y)
# print valid_x.shape

network = SSDAE(numpy_rng, [10000, 10000], train_x_lab, train_y_lab, train_x_unlab, alpha=alpha, beta=beta)
network.trainSGD(epochs = [75, 2])
# network.trainSGDSupervised(train_x_lab, train_y_lab, valid_x, valid_y, test_x, test_y)
network.trainSGDSupervised(train_x_lab, train_y_lab, valid_x, valid_y)