def __init__( self, np_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1] ): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(np_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') for i in range(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=np_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to the list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = dA(np_rng=np_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.out = self.sigmoid_layers[-1].output
def __init__(self, np_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1]): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(np_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') for i in range(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=np_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to the list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = dA(np_rng=np_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.out = self.sigmoid_layers[-1].output
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1]): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
print 'unrecognized option' except: pass batch_size = 100 n_batches = int(train_features.shape[0] / batch_size) assert n_batches * batch_size == train_features.shape[0] batch_train_features = train_features.reshape( (n_batches, batch_size, train_features.shape[1])) aE = ae.dA(batch_train_features.shape[2], int(sys.argv[3]), regL=1e-3) aE.fit(batch_train_features, l_rate=5, tol=1e-5, training_epochs=2000) with open('pickle_ae_' + str(sys.argv[3]) + '.pkl', 'wb') as fi: print 'dumping aE parmeters to %s' % 'pickle_ae_' + str( sys.argv[3]) + '.pkl' W = aE.W.get_value() bias_v = aE.b_h.get_value() bias_h = aE.b_v.get_value() pic.dump([W, bias_h, bias_v], fi) train_features = aE.transform(train_features) test_features = aE.transform(test_features)
def parallel(x): #import sys da = autoencoder.dA(10, 10) return '/Users/alex/mnist/src' in sys.path or '/home/susemihl/mnist/src' in sys.path
print 'unrecognized option' except: pass batch_size = 100 n_batches = int( train_features.shape[0] / batch_size ) assert n_batches*batch_size == train_features.shape[0] batch_train_features = train_features.reshape(( n_batches, batch_size, train_features.shape[1] )) aE = ae.dA(batch_train_features.shape[2], int( sys.argv[3] ), regL = 1e-3 ) aE.fit( batch_train_features, l_rate = 5, tol=1e-5, training_epochs = 2000 ) with open( 'pickle_ae_' + str( sys.argv[3] ) + '.pkl', 'wb' ) as fi: print 'dumping aE parmeters to %s'%'pickle_ae_' + str( sys.argv[3] ) + '.pkl' W = aE.W.get_value() bias_v = aE.b_h.get_value() bias_h = aE.b_v.get_value() pic.dump( [W, bias_h, bias_v] , fi ) train_features = aE.transform( train_features ) test_features = aE.transform( test_features )
valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "Loading learned model" # Prepare shared variables index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # Load Denoising Autoencoder rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=n_hidden) import cPickle da_dumped, logreg_dumped = cPickle.load(open("demo_model.pkl","r")) print logreg_dumped da.load(da_dumped) input_logreg = da.get_hidden_values(x) # Load logistic regression classifier = LogisticRegression(input=input_logreg, n_in=n_hidden, n_out=10) classifier.load(logreg_dumped)
with dview.sync_imports(): import sys import autoencoder import theano import theano.tensor print 'we iz gotz parallelz!!' labels, train, test = pp.load_from_csv(sys.argv[2], sys.argv[3]) labels = np.array(labels) if ae_pretrain: unsuper = np.append(train, test, axis=0) print unsuper.shape print 'ae pretraining...' da = ae.dA(784, 400) da.fit(unsuper.reshape((unsuper.shape[0] / 100, 100, 784)), training_epochs=50) W = da.W.get_value() bh = da.b_h.get_value() print 'pretrained ae' with open(sys.argv[1], 'wb') as f: pic.dump([W, bh], f) print 'dumped ae stuffz to %s' % sys.argv[1] cross_val = cv.KFold(train.shape[0], k=8) sets = [(train[i], labels[i], train[j], labels[j], W, bh) for i, j in cross_val] print 'trying parallel evaluation...'
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1]): # 必要なレイヤー配列を定義 self.sigmoid_layers = [] self.dA_layers = [] self.params = [] # 隠れ層の数 self.n_layers = len(hidden_layers_sizes) # 隠れ層の数は1以上 assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # 画像データ self.x = T.matrix('x') # int型正解ラベルデータ self.y = T.ivector('y') for i in xrange(self.n_layers): if i == 0: # 最初の隠れ層の入力データの数は、入力層のユニット数 input_size = n_ins else: # 2つ目以降の隠れ層の入力データの数は、ひとつ前の隠れ層のユニット数 input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output # 隠れ層 sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # 隠れ層のリストに追加 self.sigmoid_layers.append(sigmoid_layer) # 隠れ層のWとb self.params.extend(sigmoid_layer.params) # AutoEncoder dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) # AutoEncoderのリストに追加 self.dA_layers.append(dA_layer) # sigmlid_layresの最後のレイヤーを入力にする、hidden_layers_sizesの最後の層は入力のユニット数、出力ユニットの数はn_outs self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) # LogisticRegression層のWとb self.params.extend(self.logLayer.params) # 正則化項は無しで良い self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # LogisticRegression層のエラーを使う self.errors = self.logLayer.errors(self.y)
output_file = ('ae_' + str(patch_size) + 'x' + str(patch_size) + '_' + str(n_filters) + '_filters_backup.pikle') # if the autoencoder hasn't been trained and pickled beforehand, # train it and back it up now if output_file not in os.listdir('.'): ''' AUTOENCODER TRAINING STARTS HERE ''' patches = pp.make_patches( train, patch_size = patch_size) print "generated patches" #Creates a denoising autoencoder with 500 hidden nodes, # could be changed as well a = autoencoder.dA( patch_size**2, n_filters) a.fit(patches[:10000], training_epochs = 1000, verbose=True) W_ae = np.reshape(a.W.get_value(), (n_filters, 1, patch_size, patch_size)) b_ae = np.reshape(a.b_h.get_value(), (n_filters,)) fi = open(output_file, 'w') pic.dump([W_ae, b_ae], fi) fi.close() ''' AUTOENCODER TRAINING ENDS HERE ''' else: # if autoencoder has been trained and backed up in the file named,
''' AUTOENCODER TRAINING STARTS HERE ''' batches = ld.make_vector_patches(train_data, training_batches, batch_size, patch_size) validation_images = ld.make_vector_patches(validation_data, 1, validation_data['images'].shape[0], patch_size) #batches,ys = ld.make_vector_patches(train_data, # training_batches,batch_size,patch_size) #validation_images,validation_ys = ld.make_vector_batches( # validation_data,1,validation_data['images'].shape[0]) x = T.dmatrix('x') #Creates a denoising autoencoder with 500 hidden nodes, # could be changed as well a = dA(patch_size * patch_size, n_filters, data=x, regL=0.05) #sEt theano shared variables for the train and validation data data_x = theano.shared(value=np.asarray(batches, dtype=theano.config.floatX), name='data_x') validation_x = theano.shared( value=np.asarray(validation_images[0, :, :], dtype=theano.config.floatX), name='validation_x') #get cost and update functions for the autoencoder cost, updates = a.get_cost_and_updates(0.4, 0.02) #train_da returns the current cost and updates the dA parameters # index gives the batch index. train_da = theano.function([index], cost, updates=updates, givens=[(x, data_x[index])], on_unused_input='ignore') #validation_error just returns the cost on the validation set
with dview.sync_imports(): import sys import autoencoder import theano import theano.tensor print 'we iz gotz parallelz!!' labels, train, test = pp.load_from_csv( sys.argv[2], sys.argv[3] ) labels = np.array(labels) if ae_pretrain: unsuper = np.append( train, test, axis = 0 ) print unsuper.shape print 'ae pretraining...' da = ae.dA(784,400) da.fit(unsuper.reshape((unsuper.shape[0]/100,100,784)), training_epochs = 50) W = da.W.get_value() bh = da.b_h.get_value() print 'pretrained ae' with open(sys.argv[1],'wb') as f: pic.dump([W,bh],f) print 'dumped ae stuffz to %s'%sys.argv[1] cross_val = cv.KFold( train.shape[0], k=8) sets = [(train[i],labels[i],train[j],labels[j], W, bh) for i,j in cross_val] print 'trying parallel evaluation...' scores = dview.map( train_mlp_and_score, sets[0] )
def parallel(x): #import sys da = autoencoder.dA(10,10) return '/Users/alex/mnist/src' in sys.path or '/home/susemihl/mnist/src' in sys.path