EMO='valence' # EMO='arousal' do_regularize = False # fold_id = 2 all_fold_pred = list() all_fold_y_test = list() all_fold_id_test = list() for fold_id in range(1): print '... loading FOLD %d'%fold_id fold = pickle.load( open( DATADIR + '/pkl/fold%d_normed.pkl'%(fold_id), "rb" ) ) X_train, y_train, id_train = load_X_from_fold(fold, 'train') X_test, y_test, id_test = load_X_from_fold(fold, 'test') print id_test.shape # X_train = X_train[:,[10,12,13,17,19,82,83,84,85,89,90,91,103,140,142,146,148,212,214,218,220]] # X_test = X_test[:,[10,12,13,17,19,82,83,84,85,89,90,91,103,140,142,146,148,212,214,218,220]] # X_train = X_train[:,[13,85,103,142,214]] # X_test = X_test[:,[13,85,103,142,214]] # one dimension at a time # 0: arousal, 1: valence if EMO == 'valence': print '... emotion: valence' y_train = y_train[:,0] y_test = y_test[:,0]
EMO = 'valence' # EMO='arousal' do_regularize = False # fold_id = 2 all_fold_pred = list() all_fold_y_test = list() all_fold_id_test = list() for fold_id in range(10): print '... loading FOLD %d' % fold_id fold = pickle.load( open(DATADIR + '/pkl/fold%d_normed.pkl' % (fold_id), "rb")) X_train, y_train, id_train = load_X_from_fold(fold, 'train') X_test, y_test, id_test = load_X_from_fold(fold, 'test') print id_test.shape # X_train = X_train[:,[10,12,13,17,19,82,83,84,85,89,90,91,103,140,142,146,148,212,214,218,220]] # X_test = X_test[:,[10,12,13,17,19,82,83,84,85,89,90,91,103,140,142,146,148,212,214,218,220]] # X_train = X_train[:,[13,85,103,142,214]] # X_test = X_test[:,[13,85,103,142,214]] # one dimension at a time # 0: arousal, 1: valence if EMO == 'valence': print '... emotion: valence' y_train = y_train[:, 0] y_test = y_test[:, 0]
def test_dA(learning_rate=0.1, training_epochs=100, batch_size=60, output_folder="dA_plots/"): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ cost_type = "MSE" noise_type = "gaussian" corruption_level = 0.3 n_hidden = 500 NUM_FRAMES = 60 # DATADIR = '/baie/corpus/emoMusic/train/' DATADIR = "./train/" fold_id = 0 for fold_id in range(0, 10): print "... loading FOLD %d" % fold_id fold = pickle.load(open(DATADIR + "/pkl/fold%d_normed.pkl" % (fold_id), "rb")) X_train, y_train, id_train = load_X_from_fold(fold, "train") # scale data to [0, 1] min_max_scaler = preprocessing.MinMaxScaler() X_train = min_max_scaler.fit_transform(X_train) # X_train = X_train[:, 0:256] train_set_x, train_set_y = shared_dataset(X_train, y_train) X_test, y_test, id_test = load_X_from_fold(fold, "test") X_test = min_max_scaler.transform(X_test) # X_test = X_test[:, 0:256] print X_test.shape test_set_x, test_set_y = shared_dataset(X_test, y_test) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size print "batch_size: %d, n_train_batches: %d" % (batch_size, n_train_batches) # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images # end-snippet-2 if not os.path.isdir(output_folder): os.makedirs(output_folder) # os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=260, # n_visible=256, n_hidden=n_hidden, act_enc="sigmoid", act_dec="sigmoid", ) cost, updates = da.get_cost_updates( corruption_level=corruption_level, learning_rate=learning_rate, cost=cost_type, noise=noise_type, # noise = 'binomial' ) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size : (index + 1) * batch_size]} ) start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print "Training epoch %d, cost " % epoch, numpy.mean(c) end_time = timeit.default_timer() training_time = end_time - start_time print >> sys.stderr, ( "The no corruption code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((training_time) / 60.0) ) # image = Image.fromarray( # tile_raster_images(X=da.W.get_value(borrow=True).T, # img_shape=(16, 16), tile_shape=(10, 10), # tile_spacing=(1, 1))) # image.save(output_folder + 'filters_corruption_0.png') # if save_dir: # da.save(save_dir) denoising_error = da.get_denoising_error(test_set_x, cost_type, noise_type, corruption_level) print "Training complete in %f (min) with final denoising error in test: %f" % ( training_time / 60.0, denoising_error, ) # hidden_features = numpy(da.get_hidden_values(test_set_x)) # print 'hidden_features: ' # print hidden_features # theano functions to get representations of the dataset learned by the model index = T.lscalar() # index to a [mini]batch x = theano.tensor.matrix("input") # act from the test dataset # need to get a T.matrix instead of a shared dataset to be able to use the get_hidden_values function tilde_x = da.get_corrupted_input(test_set_x, corruption_level) get_rep_test = theano.function( [], da.get_hidden_values(x), updates={}, givens={x: tilde_x}, name="get_rep_test" ) test_act = get_rep_test() # print type(test_act) # print test_act.shape # act from the training dataset tilde_x = da.get_corrupted_input(train_set_x, corruption_level) get_rep_train = theano.function( [], da.get_hidden_values(x), updates={}, givens={x: tilde_x}, name="get_rep_test" ) train_act = get_rep_train() output = dict() output["train"] = dict() output["train"]["X"] = train_act output["train"]["y"] = y_train output["train"]["song_id"] = id_train output["test"] = dict() output["test"]["X"] = test_act output["test"]["y"] = y_test output["test"]["song_id"] = id_test act_dir = "AE/activations/" nom = act_dir + "fold%d_cost%s_noise%s_level%.1f_nh%d_it%d.pkl" % ( fold_id, cost_type, noise_type, corruption_level, n_hidden, training_epochs, ) pickle.dump(output, open(nom, "wb")) print "activation (dict) saved in " + nom
def test_dA(learning_rate=0.1, training_epochs=100, batch_size=60, output_folder='dA_plots/'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ cost_type = 'MSE' noise_type = 'gaussian' corruption_level = 0.3 n_hidden = 500 NUM_FRAMES = 60 # DATADIR = '/baie/corpus/emoMusic/train/' DATADIR = './train/' fold_id = 0 for fold_id in range(0, 10): print '... loading FOLD %d' % fold_id fold = pickle.load( open(DATADIR + '/pkl/fold%d_normed.pkl' % (fold_id), "rb")) X_train, y_train, id_train = load_X_from_fold(fold, 'train') # scale data to [0, 1] min_max_scaler = preprocessing.MinMaxScaler() X_train = min_max_scaler.fit_transform(X_train) # X_train = X_train[:, 0:256] train_set_x, train_set_y = shared_dataset(X_train, y_train) X_test, y_test, id_test = load_X_from_fold(fold, 'test') X_test = min_max_scaler.transform(X_test) # X_test = X_test[:, 0:256] print X_test.shape test_set_x, test_set_y = shared_dataset(X_test, y_test) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value( borrow=True).shape[0] / batch_size print 'batch_size: %d, n_train_batches: %d' % (batch_size, n_train_batches) # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # end-snippet-2 if not os.path.isdir(output_folder): os.makedirs(output_folder) # os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=260, # n_visible=256, n_hidden=n_hidden, act_enc='sigmoid', act_dec='sigmoid') cost, updates = da.get_cost_updates( corruption_level=corruption_level, learning_rate=learning_rate, cost=cost_type, noise=noise_type, # noise = 'binomial' ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size] }) start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = timeit.default_timer() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) # image = Image.fromarray( # tile_raster_images(X=da.W.get_value(borrow=True).T, # img_shape=(16, 16), tile_shape=(10, 10), # tile_spacing=(1, 1))) # image.save(output_folder + 'filters_corruption_0.png') # if save_dir: # da.save(save_dir) denoising_error = da.get_denoising_error(test_set_x, cost_type, noise_type, corruption_level) print 'Training complete in %f (min) with final denoising error in test: %f' \ %(training_time / 60.,denoising_error) # hidden_features = numpy(da.get_hidden_values(test_set_x)) # print 'hidden_features: ' # print hidden_features # theano functions to get representations of the dataset learned by the model index = T.lscalar() # index to a [mini]batch x = theano.tensor.matrix('input') # act from the test dataset # need to get a T.matrix instead of a shared dataset to be able to use the get_hidden_values function tilde_x = da.get_corrupted_input(test_set_x, corruption_level) get_rep_test = theano.function([], da.get_hidden_values(x), updates={}, givens={x: tilde_x}, name='get_rep_test') test_act = get_rep_test() # print type(test_act) # print test_act.shape # act from the training dataset tilde_x = da.get_corrupted_input(train_set_x, corruption_level) get_rep_train = theano.function([], da.get_hidden_values(x), updates={}, givens={x: tilde_x}, name='get_rep_test') train_act = get_rep_train() output = dict() output['train'] = dict() output['train']['X'] = train_act output['train']['y'] = y_train output['train']['song_id'] = id_train output['test'] = dict() output['test']['X'] = test_act output['test']['y'] = y_test output['test']['song_id'] = id_test act_dir = 'AE/activations/' nom = act_dir + 'fold%d_cost%s_noise%s_level%.1f_nh%d_it%d.pkl' % ( fold_id, cost_type, noise_type, corruption_level, n_hidden, training_epochs) pickle.dump(output, open(nom, "wb")) print 'activation (dict) saved in ' + nom