Python load_data_mnistの例、load_data.load_data_mnist Pythonの例

コード例 #1

0

ファイルを表示

ファイル: gen_wav_ca.py プロジェクト: ktho22/speech_synthesis

def gen_wav():
    dataset = 'mnist.pkl.gz'
    datasets = load_data_mnist(dataset)
    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]
    

    mdl = cPickle.load(open('ca_test.pkl'))
    x = T.matrix('x')
    x_reshape = x.reshape((1, 1, 28, 28))
    y = mdl.reconstruction(input=x_reshape)
    predict = theano.function([x],y)

    response=''
    while not response=='q':
        response= raw_input('input index :')
        index = int(response)
        x_in = train_set_x.get_value()[index,:]
        x_in = np.asmatrix(x_in, dtype=theano.config.floatX)
        recon = predict(x_in)
        
        f, (ax1, ax2) = plt.subplots(1,2,sharey=True)
        x_in_re = x_in.reshape(28,28)
        ax1.imshow(x_in_re,cmap = cm.Greys_r)
        ax1.set_title('input')
        recon=recon.reshape(28,28)
        ax2.imshow(recon, cmap = cm.Greys_r)
        ax2.set_title('reconstructed')
        plt.show()
        plt.close()

コード例 #2

0

ファイルを表示

ファイル: autoencoder.py プロジェクト: alexsuse/elastic_conv_net

            L += self.lamb * (T.mean(T.dot(self.W, self.W))
                              + T.mean(T.dot(self.Wprime, self.Wprime)))

        gparams = T.grad(cost, self.params)

        updates = []
        for param, gparams in zip(self.params, gparams):
            updates.append((param, param - learning_rate * gparams))

        return (cost, updates)

if __name__ == '__main__':
    #Mnist has 70000 examples, we use 50000 for training
    # set 20000 aside for validation
    train_size = 50000
    train_data, validation_data = ld.load_data_mnist(train_size=train_size)

    #fiddle around, not sure which values to use
    training_epochs = 100
    training_batches = 100
    patch_size = 10
    batch_size = int(train_data['images'].shape[0] / training_batches)
    batches = ld.make_vector_patches(train_data, training_batches,
                                     batch_size, patch_size)
    validation_images = ld.make_vector_patches(validation_data, 1,
                                validation_data['images'].shape[0], patch_size)
    #batches,ys = ld.make_vector_patches(train_data,training_batches,batch_size,patch_size)
    #validation_images,validation_ys = ld.make_vector_batches(validation_data,1,validation_data['images'].shape[0])

    index = T.lscalar()
    x = T.matrix('x')

コード例 #3

0

ファイルを表示

ファイル: convA.py プロジェクト: ktho22/speech_synthesis

def evaluate_convA():
    learning_rate = 0.0001
    n_epochs = 10000
    #dataset = 'mnist.pkl.gz'
    dataset = 'timit'
    batch_size = 10
    start = 0
    stop = batch_size 
    channel = 1
    image_h = 1
    image_w = 1000
    filter_h = 1
    filter_w = 25 
    nkerns = [300]
    corruption = {
        'Binomial' : None,
        'Gaussian' : 0.1,
        'Sequential' : None}
    wbiter = 5 
    wavtype='sin'
    learning_rule='mom'
    dechid='lin'
    postfix=''
    
    savepath = 'result/'
    savename = savepath + 'ca_test_wb'+str(wbiter)+'_toy'+wavtype+'_g'+str(corruption['Gaussian'])+'_w'+str(filter_w)+'_'+learning_rule+'_'+dechid+postfix
    if os.path.exists(savename+'.pkl'):
        ans=raw_input('Same exp. exists, continue? ([Y]/N) ')
        if ans.upper() == 'N':
            return

    nrng = np.random.RandomState(23455)
    trng = RandomStreams(nrng.randint(2 ** 30))
    if dataset == 'mnist.pkl.gz':
        from load_data import load_data_mnist
        datasets = load_data_mnist(dataset)
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
    elif dataset == 'timit':
        from load_data import load_data_timit_seq
        train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype)
        valid_set_x = load_data_timit_seq('valid', start, stop, image_w, wavtype)
        test_set_x = load_data_timit_seq('test', start, stop, image_w, wavtype)
    
    # compute number of minibatches for training, validation and testing
    n_train_batches0 = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches0 = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches = n_train_batches0 / batch_size
    n_valid_batches = n_valid_batches0 / batch_size
    n_test_batches = n_test_batches0 / batch_size

    assert min(n_train_batches, n_valid_batches, n_test_batches)>0,\
        'Maximum batch size is %d' % min(n_train_batches0, n_valid_batches0, n_test_batches0)

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    layer0_input = x.reshape((batch_size, channel, image_h, image_w))

    layer0 = convA(
            nrng = nrng, trng=trng,
            filter_shape = (nkerns[0], channel, filter_h, filter_w),
            poolsize = (2, 2),
            corruption = corruption)
    
    #cost = layer0.cost(layer0_input)
    cost = layer0.wbcost(layer0_input,wbiter)
    #cost = layer0.Melcost(layer0_input)
    params = layer0.params
    grads = T.grad(cost, params)
    gradsdic = dict(zip(params,grads))
    if learning_rule == 'ada':
        ad = AdaDelta()
        updates = ad.get_updates(learning_rate, gradsdic)
    elif learning_rule == 'con':
        updates = []
        for param_i, grad_i in zip(params, grads):
            updates.append((param_i, param_i - learning_rate * grad_i))
    elif learning_rule == 'mom':
        momentum = 0.96
        mm = Momentum(momentum)
        updates = mm.get_updates(learning_rate, gradsdic)

    train_model = theano.function(
        inputs = [index], 
        outputs = cost, 
        updates = updates,
        givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]})
    
    validate_model = theano.function(
        inputs = [index], 
        outputs = cost,
        givens = {x: valid_set_x[index * batch_size: (index + 1) * batch_size]})
    
    test_model = theano.function(
        inputs = [index],
        outputs = cost,
        givens = {x: test_set_x[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)

    first_lr = learning_rate
    st_an = 800
    en_an = 2000
    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()
    
    score_cum=[]
    epoch = 0
    done_looping = False
    
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        if epoch > st_an and learning_rule in ['con','mom']:
            learning_rate = first_lr/(epoch-st_an)
        #if epoch >=  st_an and epoch < en_an:
        #    learning_rate -= first_lr/(en_an-st_an)
        #elif epoch >=en_an:
        #    learning_rate = 0.
        
        for minibatch_index in xrange(n_train_batches):
            
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if iter % 1000 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print('    %3i, validation error %f, %s ' % \
                      (epoch, this_validation_loss, savename))
                score_cum.append(this_validation_loss)
                plt.plot(xrange(len(score_cum)),score_cum)
                plt.savefig(savename+'.png')
                plt.close()

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    
                    layer0.set_cost(best_validation_loss)
                    with open(savename+'.pkl', 'wb') as f:
                        pickle.dump(layer0,f)

                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)
                    print(('       test error %f') % (test_score))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f obtained at iteration %i, with test performance %f' %
          (best_validation_loss, best_iter + 1, test_score))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                        ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print savename

コード例 #4

0

ファイルを表示

ファイル: convA.py プロジェクト: ktho22/speech_synthesis

def evaluate_lenet5():
    learning_rate = 0.1
    n_epochs = 200
    #dataset = 'mnist.pkl.gz'
    dataset = 'timit'
    frame_length = 100
    start = 0
    stop =10 
    nkerns = [20]
    batch_size = 1 
    channel = 1
    image_h = 1
    image_w = 1098
    filter_h = 1
    filter_w = 5

    rng = numpy.random.RandomState(23455)

    if dataset == 'mnist.pkl.gz':
        from load_data import load_data_mnist
        datasets = load_data_mnist(dataset)
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
    if dataset == 'timit':
        from load_data import load_data_timit
        train_set_x = load_data_timit('train', frame_length, start, stop, True)
        valid_set_x = load_data_timit('valid', frame_length, start, stop, True)
        test_set_x = load_data_timit('test', frame_length, start, stop, True)
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    layer0_input = x.reshape((batch_size, channel, image_h, image_w))

    layer0 = convA(rng, filter_shape = (nkerns[0], channel, filter_h, filter_w), 
            poolsize = (2, 2))

    cost = layer0.cost(layer0_input)
    params = layer0.params
    grads = T.grad(cost, params)

    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        inputs = [index], 
        outputs = cost, 
        updates = updates,
        givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]})
    
    validate_model = theano.function(
        inputs = [index], 
        outputs = cost,
        givens = {x: valid_set_x[index * batch_size: (index + 1) * batch_size]})
    
    test_model = theano.function(
        inputs = [index],
        outputs = cost,
        givens = {x: test_set_x[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()
    
    score_cum=[]
    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        
        #plt.plot(range(len(score_cum)),score_cum)
        #plt.savefig('ca_test.png')
        #plt.close()
        
        for minibatch_index in xrange(n_train_batches):
            
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('    epoch %i, minibatch %i/%i, validation error %f ' % \
                      (epoch, minibatch_index + 1, n_train_batches, this_validation_loss))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    f = open('ca_test.pkl', 'wb')
                    cPickle.dump(layer0,f)
                    f.close()


                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(('    epoch %i, minibatch %i/%i, test error of best model %f') %
                          (epoch, minibatch_index + 1, n_train_batches, test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

コード例 #5

0

ファイルを表示

ファイル: kmeans.py プロジェクト: alexsuse/elastic_conv_net

	dists =np.zeros((self.nfeatures,nexamples))
	for i in xrange(self.training_epochs):
		print '---->\n.....training epoch %d'%i
		dists = np.diag(np.dot(self.prototypes,self.prototypes.T))-2*np.dot(data,self.prototypes.T)
		assignments = np.argmin(dists.T,axis=0)
		for j in xrange(self.nfeatures):
			self.prototypes[j,:] = np.mean(data[assignments==j,:],axis=0)
	if show_results==True:
		for i in xrange(self.nfeatures):
			plt.imshow(self.prototypes[i,:].reshape((int(np.sqrt(dim)),int(np.sqrt(dim)))),interpolation='nearest')
			plt.show()



if __name__=='__main__':
	data,_ = ld.load_data_mnist(50000)
	batches = ld.make_vector_patches(data,1,50000,10)
	#batches = ld.make_vector_batches(data,1)
	nfeatures = 30
	#km = KMeans(batches[0][0,:,:],nfeatures)
	km = KMeans(batches[0,:,:],nfeatures)
	for i in xrange(nfeatures):
		#plt.imshow(km.prototypes[i,:].reshape((28,28)),interpolation='nearest')
		plt.imshow(km.prototypes[i,:].reshape((10,10)),interpolation='nearest')
		plt.show()
		#if raw_input('continue?')!='y':
		#	break