Python load_class_data_batch Examples, loading_functions.load_class_data_batch Python Examples

Example #1

0

Show file

File: 2_layer_ahead_analysis.py Project: sl3368/DeepBirdBrain

song_size = 1000

num_songs_in_batch = 3000

savefilename_pre = '/vega/stats/users/sl3368/rnn_code/results/lstm/2_layer/400/generated/'

params_file = '/vega/stats/users/sl3368/rnn_code/saves/params/lstm/2_layer/400/recent_2nd.save'

test_cost = .05 #general validation number

################################################
# Loading data
################################################


dataset_info = load_class_data_batch('/vega/stats/users/sl3368/Data_LC/NopadNormData/LC_stim_5.mat')
stim = dataset_info[0]
data_set_x = theano.shared(stim, borrow=True)


###############################################################
# (Re-Define) Architecture: input --> LSTM --> predict one-ahead
###############################################################

x = T.matrix('x')  # the data is presented as a vector of inputs with many exchangeable examples of this vector
x = clip_gradient(x,1.0)    
is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

rng = numpy.random.RandomState(1234)

# The poisson regression layer gets as input the hidden units

Example #2

0

Show file

File: regenerate_image.py Project: sl3368/DeepBirdBrain

savefilename = '/vega/stats/users/sl3368/encoder/results/reconstructed/reconstructed_convae_temporal_nopad_3rd_img.out'

datapathpre = '/vega/stats/users/sl3368/Data_LC/NopadNormData/'

load_params = True
load_params_filename = '/vega/stats/users/sl3368/encoder/saves/params/temporal/convae_5layer_nopad_10_3rd.save'

#######################################
#######################################


######## LOADING TRAINING AND TESTING DATA #################
###########################################################

dataset_info = load_class_data_batch(datapathpre + 'LC_stim_10.mat')
stim = dataset_info[0]
data_set_x = theano.shared(stim, borrow=True)
n_batches = data_set_x.shape[0].eval()/(song_size*minibatch_size)

###########################################################
###########################################################

############ CONSTRUCTING MODEL ARCHITECTURE ##############
###########################################################


print 'Building model...'

# allocate symbolic variables for the data

Example #3

0

Show file

File: script.py Project: sl3368/DeepBirdBrain

#indicate whether to load parameters and from where
load_params = True
load_params_filename = '/vega/stats/users/sl3368/rnn_code/saves/params/lc_1_10_lowpad_LSTM_triple_300.save' 

n_epochs=10

#dataset_info = load_all_data()
#data_set_x = dataset_info[0]
#maxBatchSize = numpy.int_(dataset_info[1])
#batch_size = maxBatchSize
#n_train_batches = 28
#n_valid_batches = 1
#n_test_batches = 1

datapathpre = '/vega/stats/users/sl3368/Data_LC/LowNormData/'
dataset_info = load_class_data_batch(datapathpre + 'LC_stim_5.mat')
stim = dataset_info[0]
data_set_x = theano.shared(stim, borrow=True)

#validation and testing - for now, use last one
dataset_info_vt = load_class_data_vt(datapathpre + 'LC_stim_15.mat')
data_set_x_vt = dataset_info_vt[0]

batch_size = 2000
n_batches = data_set_x.shape[0].eval()/batch_size
print 'n_batches: '+str(n_batches)
n_val_batches = 10
n_test_batches = 10

n_train_batches = n_batches #data_set_x.shape[0].eval()/batch_size - n_val_batches - n_test_batches
print 'Number of batches for training: '+str(n_train_batches)

Example #4

0

Show file

load_params = True
load_params_filename = '/vega/stats/users/sl3368/rnn_code/saves/params/lstm/3_layer/1000_1000_1000/4th_5_6.save'

minibatch_size = 1
song_size = 1000

n_val_batches = 200
n_test_batches = 10

#filepath for saving parameters
savefilename = '/vega/stats/users/sl3368/rnn_code/saves/params/lstm/3_layer/1000_1000_1000/5th_5_6.save'

################################################
# Load Data
################################################
dataset_info = load_class_data_batch(datapathpre + 'LC_stim_5.mat')
stim = dataset_info[0]
data_set_x = theano.shared(stim, borrow=True)

#validation and testing - for now, use last one
dataset_info_vt = load_class_data_vt(datapathpre + 'LC_stim_15.mat')
data_set_x_vt = dataset_info_vt[0]

n_batches = data_set_x.shape[0].eval() / song_size

n_train_batches = n_batches
print 'Number of songs in single matlab chunk: ' + str(n_train_batches)
all_inds = numpy.arange(n_batches)
numpy.random.shuffle(all_inds)
train_inds = all_inds[0:n_train_batches]
val_inds = numpy.arange(n_val_batches)

Example #5

0

Show file

File: regenerate_image.py Project: sl3368/DeepBirdBrain

song_size = 1000  #dependent on padding (regular,low,none)

savefilename = '/vega/stats/users/sl3368/encoder/results/reconstructed/reconstructed_convae_temporal_nopad_3rd_img.out'

datapathpre = '/vega/stats/users/sl3368/Data_LC/NopadNormData/'

load_params = True
load_params_filename = '/vega/stats/users/sl3368/encoder/saves/params/temporal/convae_5layer_nopad_10_3rd.save'

#######################################
#######################################

######## LOADING TRAINING AND TESTING DATA #################
###########################################################

dataset_info = load_class_data_batch(datapathpre + 'LC_stim_10.mat')
stim = dataset_info[0]
data_set_x = theano.shared(stim, borrow=True)
n_batches = data_set_x.shape[0].eval() / (song_size * minibatch_size)

###########################################################
###########################################################

############ CONSTRUCTING MODEL ARCHITECTURE ##############
###########################################################

print 'Building model...'

# allocate symbolic variables for the data

index = T.lscalar()  # index to a [mini]batch

Example #6

0

Show file

File: dual_encoder.py Project: sl3368/DeepBirdBrain

minibatch_size = 1  # should be 1, train single song at a time
song_size = 2000  # dependent on padding (regular,low,none)

savefilename = "/vega/stats/users/sl3368/encoder/saves/params/gpu_conv_ae_5_layer_10.save"
results_filename = "/vega/stats/users/sl3368/encoder/results/gpu_conv_ae_5_layer_10.out"
datapathpre = "/vega/stats/users/sl3368/Data_LC/LowNormData/"


#######################################
#######################################


######## LOADING TRAINING AND TESTING DATA #################
###########################################################

dataset_info = load_class_data_batch(datapathpre + "LC_stim_1.mat")
stim = dataset_info[0]
data_set_x = theano.shared(stim, borrow=True)

# validation and testing - for now, use last one
dataset_info_vt = load_class_data_vt(datapathpre + "LC_stim_15.mat")
data_set_x_vt = dataset_info_vt[0]

n_batches = data_set_x.shape[0].eval() / song_size
n_train_batches = n_batches

print "Number of song for training in single chunk file: " + str(n_train_batches)

###########################################################
###########################################################

Example #7

0

Show file

File: 3_layer_load_and_generate.py Project: sl3368/DeepBirdBrain

song_size = 2000

num_songs_in_batch = 3000

savefilename_pre = "/vega/stats/users/sl3368/rnn_code/results/lstm/3_layer/300/generated/"

params_file = "/vega/stats/users/sl3368/rnn_code/saves/params/lstm/3_layer/300/recent_2nd.save"

test_cost = 0.05  # general validation number

################################################
# Loading data
################################################


dataset_info = load_class_data_batch("/vega/stats/users/sl3368/Data_LC/LowNormData/LC_stim_5.mat")
stim = dataset_info[0]
data_set_x = theano.shared(stim, borrow=True)


###############################################################
# (Re-Define) Architecture: input --> LSTM --> predict one-ahead
###############################################################

x = T.matrix("x")  # the data is presented as a vector of inputs with many exchangeable examples of this vector
x = clip_gradient(x, 1.0)
is_train = T.iscalar("is_train")  # pseudo boolean for switching between training and prediction

rng = numpy.random.RandomState(1234)

# The poisson regression layer gets as input the hidden units

Example #8

0

Show file

File: 3_layer_ahead_analysis.py Project: sl3368/DeepBirdBrain

song_size = 1000

num_songs_in_batch = 4000

savefilename_pre = '/vega/stats/users/sl3368/rnn_code/results/lstm/3_layer/1000_1000_1000/generated/'

params_file = '/vega/stats/users/sl3368/rnn_code/saves/params/lstm/3_layer/1000_1000_1000/4th_5_6.save'

test_cost = .05 #general validation number

################################################
# Loading data
################################################


dataset_info = load_class_data_batch('/vega/stats/users/sl3368/Data_LC/NopadNormData/LC_stim_5.mat')
stim = dataset_info[0]
data_set_x = theano.shared(stim, borrow=True)


###############################################################
# (Re-Define) Architecture: input --> LSTM --> predict one-ahead
###############################################################

x = T.matrix('x')  # the data is presented as a vector of inputs with many exchangeable examples of this vector
x = clip_gradient(x,1.0)    
is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

rng = numpy.random.RandomState(1234)

# The poisson regression layer gets as input the hidden units

Example #9

0

Show file

File: mlp_encoder.py Project: sl3368/DeepBirdBrain

def test_dA(learning_rate=0.1, training_epochs=1000,
            dataset='mnist.pkl.gz',
            batch_size=1, output_folder='/vega/stats/users/sl3368/encoder/results/dA_plots'):

    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    #datasets = load_data(dataset)
    #train_set_x, train_set_y = datasets[0]
    datapathpre = '/vega/stats/users/sl3368/Data_LC/LowNormData/'
    dataset_info = load_class_data_batch(datapathpre + 'LC_stim_10.mat')
    stim = dataset_info[0]

    #adding whole songs as single input
    new_stim = numpy.zeros((3000,60000), dtype=theano.config.floatX)
    for i in range(3000):
        new_stim[i] = stim[i*2000:(i*2000)+1000].ravel()

    train_set_x = theano.shared(new_stim, borrow=True)
    #train_set_x = theano.shared(stim, borrow=True)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    print 'n_train_batches: '+str(n_train_batches)

    # start-snippet-2
    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    # end-snippet-2

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=60 * 1000,
        n_hidden=2000
    )

    cost, updates = da.get_cost_updates(
        corruption_level=0.,
        learning_rate=learning_rate
    )

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    start_time = timeit.default_timer()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            song = train_set_x[batch_index]
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The no corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(60, 1000), tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('/vega/stats/users/sl3368/encoder/saves/filters_corruption_0.png')

    # start-snippet-3
    #####################################
    # BUILDING THE MODEL CORRUPTION 30% #
    #####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible= 60 * 1000,
        n_hidden=2000
    )

    cost, updates = da.get_cost_updates(
        corruption_level=0.3,
        learning_rate=learning_rate
    )

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    start_time = timeit.default_timer()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The 30% corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % (training_time / 60.))
    # end-snippet-3

    # start-snippet-4
    image = Image.fromarray(tile_raster_images(
        X=da.W.get_value(borrow=True).T,
        img_shape=(28, 28), tile_shape=(10, 10),
        tile_spacing=(1, 1)))
    image.save('filters_corruption_30.png')
    # end-snippet-4

    os.chdir('../')

Example #10

0

Show file

File: mlp_encoder.py Project: sl3368/DeepBirdBrain

def test_dA(learning_rate=0.1,
            training_epochs=1000,
            dataset='mnist.pkl.gz',
            batch_size=1,
            output_folder='/vega/stats/users/sl3368/encoder/results/dA_plots'):
    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    #datasets = load_data(dataset)
    #train_set_x, train_set_y = datasets[0]
    datapathpre = '/vega/stats/users/sl3368/Data_LC/LowNormData/'
    dataset_info = load_class_data_batch(datapathpre + 'LC_stim_10.mat')
    stim = dataset_info[0]

    #adding whole songs as single input
    new_stim = numpy.zeros((3000, 60000), dtype=theano.config.floatX)
    for i in range(3000):
        new_stim[i] = stim[i * 2000:(i * 2000) + 1000].ravel()

    train_set_x = theano.shared(new_stim, borrow=True)
    #train_set_x = theano.shared(stim, borrow=True)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    print 'n_train_batches: ' + str(n_train_batches)

    # start-snippet-2
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    # end-snippet-2

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=60 * 1000,
            n_hidden=2000)

    cost, updates = da.get_cost_updates(corruption_level=0.,
                                        learning_rate=learning_rate)

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = timeit.default_timer()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            song = train_set_x[batch_index]
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The no corruption code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((training_time) / 60.))
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(60, 1000),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save(
        '/vega/stats/users/sl3368/encoder/saves/filters_corruption_0.png')

    # start-snippet-3
    #####################################
    # BUILDING THE MODEL CORRUPTION 30% #
    #####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=60 * 1000,
            n_hidden=2000)

    cost, updates = da.get_cost_updates(corruption_level=0.3,
                                        learning_rate=learning_rate)

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = timeit.default_timer()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The 30% corruption code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          (training_time / 60.))
    # end-snippet-3

    # start-snippet-4
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_30.png')
    # end-snippet-4

    os.chdir('../')