def should_success_calculate_for_multiple_neurons(): network = MultipleLayersModel([ Layer(input_dimension=1, output_dimension=3, activation_function=LinearFunction(), weights_initializer=ConstShapeInitializer( np.asarray([[1., 2., 3.]])), biases_initializer=ConstShapeInitializer(np.asarray([1., 2., 3.]))), Layer(input_dimension=3, output_dimension=1, activation_function=LinearFunction(2.), weights_initializer=ConstShapeInitializer( np.asarray([[1.], [2.], [3.]])), biases_initializer=ConstShapeInitializer(np.asarray([1.]))) ]) X = np.asarray([[0.], [1.]]) Y = np.asarray([[0.], [2.]]) gradient = ApproximateGradient() square_error = SquareError() network_gradient = gradient(network, X, Y, square_error) expected = np.asarray([[ np.asarray([[224.00000444, 448.0000166, 672.00003605]]), np.asarray([344.00000857, 688.0000326, 1032.00007197]) ], [ np.asarray([[568.00002073], [1136.00008012], [1704.00017987]]), np.asarray([344.00000834]) ]]) equals(expected, network_gradient)
def test_next_layer(self): with self.assertRaises(AssertionError): Layer(15).next_layer('') x = Layer(1) y = Layer(2) x.next_layer(y) x.next is y
def test_prev_layer(self): with self.assertRaises(AssertionError): Layer(15).prev_layer('') x = Layer(1) y = Layer(2) y.prev_layer(x) y.prev is x
def init_model_params(self, dim_x): print 'M1 model params initialize' dim_z = self.hyper_params['dim_z'] n_hidden = self.hyper_params['n_hidden'] # [500, 500, 500] self.type_px = self.hyper_params['type_px'] def relu(x): return x*(x>0) + 0.01 * x def softplus(x): return T.log(T.exp(x) + 1) activation = {'tanh': T.tanh, 'relu': relu, 'softplus': softplus, 'sigmoid': T.nnet.sigmoid, 'none': None} nonlinear_q = activation[self.hyper_params['nonlinear_q']] nonlinear_p = activation[self.hyper_params['nonlinear_p']] if self.type_px == 'bernoulli': output_f = activation['sigmoid'] elif self.type_px == 'gaussian': output_f= activation['none'] # Recognize model self.recognize_layers = [Layer((dim_x, n_hidden[0]), function=nonlinear_q)] if len(n_hidden) > 1: self.recognize_layers += [Layer(shape, function=nonlinear_q) for shape in zip(n_hidden[:-1], n_hidden[1:])] self.recognize_mean_layer = Layer((n_hidden[-1], dim_z), function=None) self.recognize_log_sigma_layer = Layer((n_hidden[-1], dim_z), function=None, w_zero=True, b_zero=True) # Generate Model self.generate_layers = [Layer((dim_z, n_hidden[0]), function=nonlinear_p)] if len(n_hidden) > 1: self.generate_layers += [Layer(shape, function=nonlinear_p) for shape in zip(n_hidden[:-1], n_hidden[1:])] self.generate_mean_layer = Layer((n_hidden[-1], dim_x), function=output_f) self.generate_log_sigma_layer = Layer((n_hidden[-1], dim_x), function=None, b_zero=True) self.model_params_ = ( [param for layer in self.generate_layers for param in layer.params] + self.recognize_mean_layer.params + self.recognize_log_sigma_layer.params + [param for layer in self.recognize_layers for param in layer.params] + self.generate_mean_layer.params ) if self.type_px == 'gaussian': self.model_params_ += self.generate_log_sigma_layer.params
def main(): xor = MLP() xor.add_layer(Layer(2)) xor.add_layer(Layer(2)) xor.add_layer(Layer(1)) xor.init_network() xor.patterns = [ ([0, 0], [0]), ([0, 1], [1]), ([1, 0], [1]), ([1, 1], [0]), ] print xor.train(xor.patterns) for inp, target in xor.patterns: tolerance = 0.1 computed = xor.forward(inp) error = abs(computed[0] - target[0]) print 'input: %s target: %s, output: %s, error: %.4f' % (inp, target, computed, error)
def should_success_calculate_for_multiple_examples(): network = MultipleLayersModel([ Layer(input_dimension=1, output_dimension=1, activation_function=LinearFunction(), weights_initializer=ConstShapeInitializer(np.asarray([[1.]])), biases_initializer=ConstShapeInitializer(np.asarray([2.]))), Layer(input_dimension=1, output_dimension=1, activation_function=LinearFunction(2.), weights_initializer=ConstShapeInitializer(np.asarray([[3.]])), biases_initializer=ConstShapeInitializer(np.asarray([0.]))) ]) X = np.asarray([[0.], [1.]]) Y = np.asarray([[0.], [2.]]) gradient = ApproximateGradient() square_error = SquareError() network_gradient = gradient(network, X, Y, square_error) expected = np.asarray( [[np.asarray([[192.0000359518781]]), np.asarray([336.0000719681011])], [np.asarray([[288.0000519667192]]), np.asarray([112.00000793110121])]]) equals(expected, network_gradient)
def should_be_success_calculate_output(): layer = Layer( input_dimension=2, output_dimension=3, activation_function=LinearFunction(), weights_initializer=ConstShapeInitializer( np.asarray([ [1., 2., 3.], [1., 2., 3.] ]) ), biases_initializer=ConstShapeInitializer( np.asarray( [1., 2., 3.] ) ) ) expected = np.asarray( [4., 8, 12.] ) equals(expected, layer([1, 2]))
from mlp import MultipleLayersModel, Layer from initializers import UniformInitializer, ConstShapeInitializer import numpy as np __all__ = ['gradient_teacher_test'] def function(x): return 2 * x network = MultipleLayersModel([ Layer(input_dimension=1, output_dimension=1, activation_function=LinearFunction(), weights_initializer=ConstShapeInitializer(np.asarray([[1.]])), biases_initializer=ConstShapeInitializer(np.asarray([2.]))), Layer(input_dimension=1, output_dimension=1, activation_function=LinearFunction(2.), weights_initializer=ConstShapeInitializer(np.asarray([[3.]])), biases_initializer=ConstShapeInitializer(np.asarray([0.]))) ]) def gradient_teacher_test(): uniform = UniformInitializer(seed=2019) inputs = uniform((5, 1)) outputInitializer = ConstShapeInitializer( [function(value) for value in inputs])
def test_init_weights(self): x = Layer(1) x.init_weights() self.assertIsNone(x.weights) x = Layer(1) y = Layer(1) x.next_layer(y) x.init_weights() self.assertEqual(x.weights, [[0]]) x = Layer(1) y = Layer(2) x.next_layer(y) x.init_weights() self.assertEqual(x.weights, [[0, 0]]) x = Layer(2) y = Layer(1) x.next_layer(y) x.init_weights() self.assertEqual(x.weights, [[0], [0]]) x = Layer(2) y = Layer(2) x.next_layer(y) x.init_weights() self.assertEqual(x.weights, [[0, 0], [0, 0]])
expected_value = np.zeros(len(files)) expected_value[random_index] = 1 return input_data, expected_value def update_ema(current, average): """Update the exponential moving average.""" return ERROR_RATE * abs(current) + (1 - ERROR_RATE) * average if __name__ == '__main__': input_files = sorted(os.listdir(INPUT_DIR)) input_data, expected_value = get_input_data(input_files) hidden = Layer(NUM_HIDDEN_NODES, input_data.shape[0], expit) output = Layer(len(input_files), NUM_HIDDEN_NODES, softmax) average_errors = np.ones(len(input_files)) accepted_errors = np.full(len(input_files), ACCEPTED_ERROR) while not np.all(np.less(average_errors, accepted_errors)): # get a random date input_data, expected_value = get_input_data(input_files) # process inputs outputs = output.process(hidden.process(input_data)) # calculate errors output.errors = expected_value - outputs hidden.errors = expit_prime(hidden.h) * np.dot(output.errors, output.weights)
# Extract configuration if path.isdir(options.configure) is True: raise Exception(options.configure + ': Is a directory.') if path.exists(options.configure) is False: raise Exception(options.configure + ': No such file or directory.') with open(options.configure, 'r') as yfile: cfg = yaml.load(yfile, Loader=yaml.BaseLoader) # Load data set X_train, y_train, X_test, y_test = preprocessing(cfg, csv2data(dataset_path)) # Build the network nn = NeuralNetwork(error=options.error) w_seed = int(cfg['weights_seed']) b_seed = int(cfg['bias_seed']) nn.add_layer(Layer(n_input=X_train.shape[1]), weights_seed=w_seed, bias_seed=b_seed) nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed) nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed) nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed) nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed) nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed) nn.add_layer(Layer(n_input=y_train.shape[1], activation='softmax'), weights_seed=w_seed, bias_seed=b_seed) # Train mses, cees = nn.train(X_train, y_train, X_test, y_test, learning_rate=float(cfg['learning_rate']), max_epochs=int(cfg['epoch']), mini_batch_size=float(cfg['mini_batch_size'])) if (options.plot is True): nn.plot(mses, cees, learning_rate=float(cfg['learning_rate']), mini_batch_size=int(cfg['mini_batch_size'])) nn.save()
def main(): imres = MLP() num_points = 784 imres.add_layer(Layer(num_points)) imres.add_layer(Layer(20)) imres.add_layer(Layer(10)) imres.add_bias() imres.init_network() imres.step = 0.001 imres.moment = imres.step / 10 imres.verbose = True target_error = 0.01 imres.patterns = [] imres._patterns = [] imres.test_patterns = [] imres._test_patterns = [] def norm(inp): def fn(x): return x / 255 return map(fn, inp) mn = MNIST('./mnist/data/') samples, labels = mn.load_testing() for i in range(100): outvect = [0] * 10 outvect[labels[i]] = 1 imres.patterns.append((samples[i], outvect)) imres._patterns.append((samples[i], labels[i], outvect)) for i in range(100, 200): outvect = [0] * 10 outvect[labels[i]] = 1 imres.test_patterns.append((samples[i], outvect)) imres._test_patterns.append((samples[i], labels[i], outvect)) print 'Training samples: %d' % len(imres.patterns) print 'Testing samples: %d' % len(imres.test_patterns) print 'Target error: %.4f' % target_error final_err, steps = imres.train_target(imres.patterns, target_error=target_error) print 'Training done in %d steps with final error of %.6f' % (steps, final_err) print '----- Detailed test output -----' total_tests = len(imres._test_patterns) total_fails = 0 for inp, num, target in imres._test_patterns: computed = imres.run(inp) error = abs(computed[0] - target[0]) computed = map(lambda x: round(x, 4), computed) maxn = computed[0] pos = 0 for i in range(len(computed)): if computed[i] > maxn: maxn = computed[i] pos = i if num != pos: total_fails += 1 print 'in: %d, out: %d' % (num, pos) print 'target: %s \noutput: %s' % (target, computed) print '-----' print 'Testing done - %d of %d samples classified incorrectly' % ( total_fails, total_tests)
def init_model_params(self, dim_x, dim_y): print 'M2 model params initialize' dim_z = self.hyper_params['dim_z'] n_hidden = self.hyper_params['n_hidden'] # [500, 500, 500] n_hidden_recognize = n_hidden n_hidden_generate = n_hidden[::-1] self.type_px = self.hyper_params['type_px'] activation = { 'tanh': T.tanh, 'relu': self.relu, 'softplus': self.softplus, 'sigmoid': T.nnet.sigmoid, 'none': self.identify, } self.nonlinear_q = activation[self.hyper_params['nonlinear_q']] self.nonlinear_p = activation[self.hyper_params['nonlinear_p']] if self.type_px == 'bernoulli': output_f = activation['sigmoid'] elif self.type_px == 'gaussian': output_f = activation['none'] # Recognize model self.recognize_layers = [ Layer(param_shape=(dim_x, n_hidden_recognize[0]), function=self.identify, nonbias=True), Layer(param_shape=(dim_y, n_hidden_recognize[0]), function=self.identify) ] if len(n_hidden_recognize) > 1: self.recognize_layers += [ Layer(param_shape=shape, function=self.nonlinear_q) for shape in zip( n_hidden_recognize[:-1], n_hidden_recognize[1:]) ] self.recognize_mean_layer = Layer(param_shape=(n_hidden_recognize[-1], dim_z), function=self.identify) self.recognize_log_var_layer = Layer( param_shape=(n_hidden_recognize[-1], dim_z), function=self.identify, w_zero=True, b_zero=True) # Generate Model self.generate_layers = [ Layer((dim_z, n_hidden_generate[0]), function=self.identify, nonbias=True), Layer((dim_y, n_hidden_generate[0]), function=self.identify), ] if len(n_hidden) > 1: self.generate_layers += [ Layer(param_shape=shape, function=self.nonlinear_p) for shape in zip(n_hidden_generate[:-1], n_hidden_generate[1:]) ] self.generate_mean_layer = Layer(param_shape=(n_hidden_generate[-1], dim_x), function=output_f) self.generate_log_var_layer = Layer(param_shape=(n_hidden_generate[-1], dim_x), function=self.identify, b_zero=True) # Add all parameters self.model_params_ = ([ param for layer in self.recognize_layers for param in layer.params ] + self.recognize_mean_layer.params + self.recognize_log_var_layer.params + [ param for layer in self.generate_layers for param in layer.params ] + self.generate_mean_layer.params) if self.type_px == 'gaussian': self.model_params_ += self.generate_log_var_layer.params
class M2_VAE(Base_VAE): def __init__(self, hyper_params=None, optimize_params=None, model_params=None): super(M2_VAE, self).__init__(hyper_params, optimize_params, model_params, model_name='M2') def init_model_params(self, dim_x, dim_y): print 'M2 model params initialize' dim_z = self.hyper_params['dim_z'] n_hidden = self.hyper_params['n_hidden'] # [500, 500, 500] n_hidden_recognize = n_hidden n_hidden_generate = n_hidden[::-1] self.type_px = self.hyper_params['type_px'] activation = { 'tanh': T.tanh, 'relu': self.relu, 'softplus': self.softplus, 'sigmoid': T.nnet.sigmoid, 'none': self.identify, } self.nonlinear_q = activation[self.hyper_params['nonlinear_q']] self.nonlinear_p = activation[self.hyper_params['nonlinear_p']] if self.type_px == 'bernoulli': output_f = activation['sigmoid'] elif self.type_px == 'gaussian': output_f = activation['none'] # Recognize model self.recognize_layers = [ Layer(param_shape=(dim_x, n_hidden_recognize[0]), function=self.identify, nonbias=True), Layer(param_shape=(dim_y, n_hidden_recognize[0]), function=self.identify) ] if len(n_hidden_recognize) > 1: self.recognize_layers += [ Layer(param_shape=shape, function=self.nonlinear_q) for shape in zip( n_hidden_recognize[:-1], n_hidden_recognize[1:]) ] self.recognize_mean_layer = Layer(param_shape=(n_hidden_recognize[-1], dim_z), function=self.identify) self.recognize_log_var_layer = Layer( param_shape=(n_hidden_recognize[-1], dim_z), function=self.identify, w_zero=True, b_zero=True) # Generate Model self.generate_layers = [ Layer((dim_z, n_hidden_generate[0]), function=self.identify, nonbias=True), Layer((dim_y, n_hidden_generate[0]), function=self.identify), ] if len(n_hidden) > 1: self.generate_layers += [ Layer(param_shape=shape, function=self.nonlinear_p) for shape in zip(n_hidden_generate[:-1], n_hidden_generate[1:]) ] self.generate_mean_layer = Layer(param_shape=(n_hidden_generate[-1], dim_x), function=output_f) self.generate_log_var_layer = Layer(param_shape=(n_hidden_generate[-1], dim_x), function=self.identify, b_zero=True) # Add all parameters self.model_params_ = ([ param for layer in self.recognize_layers for param in layer.params ] + self.recognize_mean_layer.params + self.recognize_log_var_layer.params + [ param for layer in self.generate_layers for param in layer.params ] + self.generate_mean_layer.params) if self.type_px == 'gaussian': self.model_params_ += self.generate_log_var_layer.params def recognize_model(self, X, Y): for i, layer in enumerate(self.recognize_layers): if i == 0: layer_out = layer.fprop(X) elif i == 1: layer_out += layer.fprop(Y) layer_out = self.nonlinear_q(layer_out) else: layer_out = layer.fprop(layer_out) q_mean = self.recognize_mean_layer.fprop(layer_out) q_log_var = self.recognize_log_var_layer.fprop(layer_out) return { 'q_mean': q_mean, 'q_log_var': q_log_var, } def generate_model(self, Z, Y): for i, layer in enumerate(self.generate_layers): if i == 0: layer_out = layer.fprop(Z) elif i == 1: layer_out += layer.fprop(Y) layer_out = self.nonlinear_p(layer_out) else: layer_out = layer.fprop(layer_out) p_mean = self.generate_mean_layer.fprop(layer_out) p_log_var = self.generate_log_var_layer.fprop(layer_out) return {'p_mean': p_mean, 'p_log_var': p_log_var} def encode(self, x, y): if self.encode_main is None: X = T.matrix() Y = T.matrix() self.encode_main = theano.function(inputs=[X, Y], outputs=self.recognize_model( X, Y)['q_mean']) return self.encode_main(x, y) def decode(self, z, y): if self.decode_main is None: Z = T.matrix() Y = T.matrix() self.decode_main = theano.function(inputs=[Z, Y], outputs=self.generate_model( Z, Y)['p_mean']) return self.decode_main(z, y) def get_expr_lbound(self, X, Y): n_samples = X.shape[0] recognized_zs = self.recognize_model(X, Y) q_mean = recognized_zs['q_mean'] q_log_var = recognized_zs['q_log_var'] eps = self.rng_noise.normal(avg=0., std=1., size=q_mean.shape).astype( theano.config.floatX) # T.exp(0.5 * q_log_var) = std # z = mean_z + std * epsilon z_tilda = q_mean + T.exp(0.5 * q_log_var) * eps generated_x = self.generate_model(z_tilda, Y) p_mean = generated_x['p_mean'] p_log_var = generated_x['p_log_var'] if self.type_px == 'gaussian': log_p_x_given_z = (-0.5 * np.log(2 * np.pi) - 0.5 * p_log_var - 0.5 * (X - p_mean)**2 / (2 * T.exp(p_log_var))) elif self.type_px == 'bernoulli': # log_p_x_given_z = X * T.log(p_mean) + (1 - X) * T.log(1 - p_mean) log_p_x_given_z = -T.nnet.binary_crossentropy(p_mean, X) logqz = -0.5 * (np.log(2 * np.pi) + 1 + q_log_var) logpz = -0.5 * (np.log(2 * np.pi) + q_mean**2 + T.exp(q_log_var)) # logqz = - 0.5 * T.sum(np.log(2 * np.pi) + 1 + q_log_var, axis=1) # logpz = - 0.5 * T.sum(np.log(2 * np.pi) + q_mean ** 2 + T.exp(q_log_var), axis=1) D_KL = T.sum(logpz - logqz) recon_error = T.sum(log_p_x_given_z) return D_KL, recon_error # return log_p_x_given_z, logpz, logqz def fit(self, x_datas, y_labels): X = T.matrix() Y = T.matrix() self.rng_noise = RandomStreams(self.hyper_params['rng_seed']) self.init_model_params(dim_x=x_datas.shape[1], dim_y=y_labels.shape[1]) D_KL, recon_error = self.get_expr_lbound(X, Y) L = D_KL + recon_error print 'start fitting' gparams = T.grad(cost=L, wrt=self.model_params_) optimizer = { 'sgd': self.sgd, 'adagrad': self.adagrad, 'adadelta': self.adaDelta, 'rmsprop': self.rmsProp, 'adam': self.adam } updates = optimizer[self.hyper_params['optimizer']]( self.model_params_, gparams, self.optimize_params) self.hist = self.early_stopping( # self.hist = self.optimize( X, Y, x_datas, y_labels, self.optimize_params, L, updates, self.rng, D_KL, recon_error, ) def optimize(self, X, Y, x_datas, y_labels, hyper_params, cost, updates, rng, D_KL, recon_error): n_iters = hyper_params['n_iters'] minibatch_size = hyper_params['minibatch_size'] n_mod_history = hyper_params['n_mod_history'] train_x = x_datas[:50000] valid_x = x_datas[50000:] train_y = y_labels[:50000] valid_y = y_labels[50000:] train = theano.function(inputs=[X, Y], outputs=[cost, D_KL, recon_error], updates=updates) validate = theano.function(inputs=[X, Y], outputs=[cost, D_KL, recon_error]) n_samples = train_x.shape[0] cost_history = [] total_cost = 0 total_dkl = 0 total_recon_error = 0 for i in xrange(n_iters): ixs = rng.permutation(n_samples) for j in xrange(0, n_samples, minibatch_size): cost, D_KL, recon_error = train( train_x[ixs[j:j + minibatch_size]], train_y[ixs[j:j + minibatch_size]]) # print np.sum(hoge(train_x[:1])[0]) total_cost += cost total_dkl += D_KL total_recon_error += recon_error if np.mod(i, n_mod_history) == 0: num = n_samples / minibatch_size print( '%d epoch train D_KL error: %.3f, Reconstruction error: %.3f, total error: %.3f' % (i, total_dkl / num, total_recon_error / num, total_cost / num)) total_cost = 0 total_dkl = 0 total_recon_error = 0 valid_error, valid_dkl, valid_recon_error = validate( valid_x, valid_y) print '\tvalid D_KL error: %.3f, Reconstruction error: %.3f, total error: %.3f' % ( valid_dkl, valid_recon_error, valid_error) cost_history.append((i, valid_error)) return cost_history def early_stopping(self, X, Y, x_datas, y_labels, hyper_params, cost, updates, rng, D_KL, recon_error): minibatch_size = hyper_params['minibatch_size'] train_x = x_datas[:50000] valid_x = x_datas[50000:] train_y = y_labels[:50000] valid_y = y_labels[50000:] train = theano.function(inputs=[X, Y], outputs=[cost, D_KL, recon_error], updates=updates) validate = theano.function( inputs=[X, Y], outputs=cost, ) n_samples = train_x.shape[0] cost_history = [] best_params = None valid_best_error = -np.inf best_epoch = 0 patience = 5000 patience_increase = 2 improvement_threshold = 1.005 done_looping = False for i in xrange(1000000): if done_looping: break ixs = rng.permutation(n_samples) for j in xrange(0, n_samples, minibatch_size): cost, D_KL, recon_error = train( train_x[ixs[j:j + minibatch_size]], train_y[ixs[j:j + minibatch_size]]) iter = i * (n_samples / minibatch_size) + j / minibatch_size if (iter + 1) % 50 == 0: valid_error = 0. for _ in xrange(3): valid_error += validate(valid_x, valid_y) valid_error /= 3 if i % 100 == 0: print 'epoch %d, minibatch %d/%d, valid total error: %.3f' % ( i, j / minibatch_size + 1, n_samples / minibatch_size, valid_error) cost_history.append((i * j, valid_error)) if valid_error > valid_best_error: if valid_error > valid_best_error * improvement_threshold: patience = max(patience, iter * patience_increase) best_params = self.model_params_ valid_best_error = valid_error best_epoch = i if patience <= iter: done_looping = True break self.model_params_ = best_params print 'epoch %d, minibatch %d/%d, valid total error: %.3f' % ( best_epoch, j / minibatch_size + 1, n_samples / minibatch_size, valid_best_error) return cost_history
class M1_GVAE(object): def __init__(self, hyper_params=None, sgd_params=None, adagrad_params=None, model_params=None): if (sgd_params is not None) and (adagrad_params is not None): raise ValueError('Error: select only one algorithm') self.hyper_params = hyper_params self.sgd_params = sgd_params self.adagrad_params = adagrad_params self.model_params = model_params self.rng = np.random.RandomState(hyper_params['rng_seed']) self.model_params_ = None self.decode_main = None self.encode_main = None def init_model_params(self, dim_x): print 'M1 model params initialize' dim_z = self.hyper_params['dim_z'] n_hidden = self.hyper_params['n_hidden'] # [500, 500, 500] self.type_px = self.hyper_params['type_px'] def relu(x): return x * (x > 0) + 0.01 * x def softplus(x): return T.log(T.exp(x) + 1) activation = { 'tanh': T.tanh, 'relu': relu, 'softplus': softplus, 'sigmoid': T.nnet.sigmoid, 'none': None } nonlinear_q = activation[self.hyper_params['nonlinear_q']] nonlinear_p = activation[self.hyper_params['nonlinear_p']] if self.type_px == 'bernoulli': output_f = activation['sigmoid'] elif self.type_px == 'gaussian': output_f = activation['none'] # Recognize model self.recognize_layers = [ Layer((dim_x, n_hidden[0]), function=nonlinear_q) ] if len(n_hidden) > 1: self.recognize_layers += [ Layer(shape, function=nonlinear_q) for shape in zip(n_hidden[:-1], n_hidden[1:]) ] self.recognize_mean_layer = Layer((n_hidden[-1], dim_z), function=None) self.recognize_log_sigma_layer = Layer((n_hidden[-1], dim_z), function=None, w_zero=True, b_zero=True) # Generate Model self.generate_layers = [ Layer((dim_z, n_hidden[0]), function=nonlinear_p) ] if len(n_hidden) > 1: self.generate_layers += [ Layer(shape, function=nonlinear_p) for shape in zip(n_hidden[:-1], n_hidden[1:]) ] self.generate_mean_layer = Layer((n_hidden[-1], dim_x), function=output_f) self.generate_log_sigma_layer = Layer((n_hidden[-1], dim_x), function=None, b_zero=True) self.model_params_ = ([ param for layer in self.generate_layers for param in layer.params ] + self.recognize_mean_layer.params + self.recognize_log_sigma_layer.params + [ param for layer in self.recognize_layers for param in layer.params ] + self.generate_mean_layer.params) if self.type_px == 'gaussian': self.model_params_ += self.generate_log_sigma_layer.params def generate_model(self, Z): for i, layer in enumerate(self.generate_layers): if i == 0: layer_out = layer.fprop(Z) else: layer_out = layer.fprop(layer_out) p_mean = self.generate_mean_layer.fprop(layer_out) p_log_var = self.generate_log_sigma_layer.fprop(layer_out) return { # 'mu': 0.5 * (T.tanh(p_mean) + 1), # 0 <= mu <= 1 # 'log_sigma': 3 * T.tanh(p_log_var) - 1, # -4 <= log sigma **2 <= 2 # 'mu': T.clip(p_mean, 0., 1.), # 'log_sigma': T.clip(p_log_var, -4., 2.) 'mu': p_mean, 'log_sigma': p_log_var } def recognize_model(self, X): for i, layer in enumerate(self.recognize_layers): if i == 0: layer_out = layer.fprop(X) else: layer_out = layer.fprop(layer_out) q_mean = self.recognize_mean_layer.fprop(layer_out) q_log_var = self.recognize_log_sigma_layer.fprop(layer_out) return { 'mu': q_mean, # 'log_sigma': 3 * T.tanh(q_log_var) - 1, # 'log_sigma': T.clip(q_log_var, -4., 2.) 'log_sigma': q_log_var } def decode(self, z): if self.decode_main is None: Z = T.matrix() self.decode_main = theano.function( inputs=[Z], outputs=self.generate_model(Z)['mu']) return self.decode_main(z) def encode(self, x): if self.encode_main is None: X = T.matrix() self.encode_main = theano.function( inputs=[X], outputs=self.recognize_model(X)['mu']) return self.encode_main(x) def get_expr_lbound(self, X): n_mc_sampling = self.hyper_params['n_mc_sampling'] n_samples = X.shape[0] dim_z = self.hyper_params['dim_z'] stats_z = self.recognize_model(X) q_mean = stats_z['mu'] q_log_var = stats_z['log_sigma'] eps = self.rng_noise.normal(size=(n_mc_sampling, n_samples, dim_z)) z_tilda = q_mean + T.exp(0.5 * q_log_var) * eps stats_x = self.generate_model(z_tilda) p_mean = stats_x['mu'] p_log_var = stats_x['log_sigma'] if self.type_px == 'gaussian': log_p_x_given_z = (-0.5 * np.log(2 * np.pi) - 0.5 * p_log_var - 0.5 * (X - p_mean)**2 / (2 * T.exp(p_log_var))) elif self.type_px == 'bernoulli': log_p_x_given_z = X * T.log(p_mean) + (1 - X) * T.log(1 - p_mean) logqz = -0.5 * T.sum(np.log(2 * np.pi) + 1 + q_log_var) logpz = -0.5 * T.sum(np.log(2 * np.pi) + q_mean**2 + T.exp(q_log_var)) consts = [] return (T.sum(log_p_x_given_z) / n_mc_sampling + (logpz - logqz)) / n_samples, consts def fit(self, x_datas): X = T.matrix() self.rng_noise = RandomStreams(self.hyper_params['rng_seed']) self.init_model_params(dim_x=x_datas.shape[1]) lbound, consts = self.get_expr_lbound(X) cost = -lbound print 'start fitting' self.hist = self.adam_calc(x_datas, cost, consts, X, self.model_params_, self.adagrad_params, self.rng) def adagrad_calc(self, x_datas, cost, consts, X, model_params, hyper_params, rng): n_iters = hyper_params['n_iters'] learning_rate = hyper_params['learning_rate'] minibatch_size = hyper_params['minibatch_size'] n_mod_history = hyper_params['n_mod_history'] calc_history = hyper_params['calc_history'] hs = [ theano.shared( np.ones(param.get_value(borrow=True).shape).astype( theano.config.floatX)) for param in model_params ] gparams = T.grad(cost=cost, wrt=model_params, consider_constant=consts) updates = [(param, param - learning_rate / (T.sqrt(h)) * gparam) for param, gparam, h in zip(model_params, gparams, hs)] updates += [(h, h + gparam**2) for gparam, h in zip(gparams, hs)] train = theano.function(inputs=[X], outputs=cost, updates=updates) validate = theano.function(inputs=[X], outputs=cost) n_samples = x_datas.shape[0] cost_history = [] for i in xrange(n_iters): ixs = rng.permutation(n_samples)[:minibatch_size] minibatch_cost = train(x_datas[ixs]) # print minibatch_cost if np.mod(i, n_mod_history) == 0: print '%d epoch error: %f' % (i, minibatch_cost) if calc_history == 'minibatch': cost_history.append((i, minibatch_cost)) else: cost_history.append((i, validate(x_datas[ixs]))) return cost_history def adam_calc(self, x_datas, cost, consts, X, model_params, hyper_params, rng): n_iters = hyper_params['n_iters'] learning_rate = hyper_params['learning_rate'] minibatch_size = hyper_params['minibatch_size'] n_mod_history = hyper_params['n_mod_history'] calc_history = hyper_params['calc_history'] rs = [ theano.shared( np.ones(param.get_value(borrow=True).shape).astype( theano.config.floatX)) for param in model_params ] vs = [ theano.shared( np.ones(param.get_value(borrow=True).shape).astype( theano.config.floatX)) for param in model_params ] ts = [ theano.shared( np.ones(param.get_value(borrow=True).shape).astype( theano.config.floatX)) for param in model_params ] gnma = 0.999 beta = 0.9 weight_decay = 1000 / 50000. gparams = T.grad(cost=cost, wrt=model_params, consider_constant=consts) updates = [(param, param - learning_rate / (T.sqrt(r / (1 - gnma**t))) * v / (1 - beta**t)) for param, r, v, t in zip(model_params, rs, vs, ts)] updates += [ (r, gnma * r + (1 - gnma) * (gparam - weight_decay * param)**2) for param, gparam, r in zip(model_params, gparams, rs) ] updates += [(v, beta * v + (1 - beta) * (gparam - weight_decay * param)) for param, gparam, v in zip(model_params, gparams, vs)] updates += [(t, t + 1) for t in ts] train = theano.function(inputs=[X], outputs=cost, updates=updates) validate = theano.function(inputs=[X], outputs=cost) n_samples = x_datas.shape[0] cost_history = [] for i in xrange(n_iters): ixs = rng.permutation(n_samples)[:minibatch_size] minibatch_cost = train(x_datas[ixs]) # print minibatch_cost if np.mod(i, n_mod_history) == 0: print '%d epoch error: %f' % (i, minibatch_cost) if calc_history == 'minibatch': cost_history.append((i, minibatch_cost)) else: cost_history.append((i, validate(x_datas[ixs]))) return cost_history
def test_init_weights(self): x = Layer(1) x.init_weights() self.assertIsNone(x.weights) cnf = lambda: 0 x = Layer(1, cnf) y = Layer(1, cnf) x.next_layer(y) x.init_weights() self.assertEqual(x.weights, [[0]]) x = Layer(1, cnf) y = Layer(2, cnf) x.next_layer(y) x.init_weights() self.assertEqual(x.weights, [[0, 0]]) x = Layer(2, cnf) y = Layer(1, cnf) x.next_layer(y) x.init_weights() self.assertEqual(x.weights, [[0], [0]]) x = Layer(2, cnf) y = Layer(2, cnf) x.next_layer(y) x.init_weights() self.assertEqual(x.weights, [[0, 0], [0, 0]])
def init_model_params(self, dim_x): print 'M1 model params initialize' dim_z = self.hyper_params['dim_z'] n_hidden = self.hyper_params['n_hidden'] # [500, 500, 500] self.type_px = self.hyper_params['type_px'] def relu(x): return x * (x > 0) + 0.01 * x def softplus(x): return T.log(T.exp(x) + 1) activation = { 'tanh': T.tanh, 'relu': relu, 'softplus': softplus, 'sigmoid': T.nnet.sigmoid, 'none': None } nonlinear_q = activation[self.hyper_params['nonlinear_q']] nonlinear_p = activation[self.hyper_params['nonlinear_p']] if self.type_px == 'bernoulli': output_f = activation['sigmoid'] elif self.type_px == 'gaussian': output_f = activation['none'] # Recognize model self.recognize_layers = [ Layer((dim_x, n_hidden[0]), function=nonlinear_q) ] if len(n_hidden) > 1: self.recognize_layers += [ Layer(shape, function=nonlinear_q) for shape in zip(n_hidden[:-1], n_hidden[1:]) ] self.recognize_mean_layer = Layer((n_hidden[-1], dim_z), function=None) self.recognize_log_sigma_layer = Layer((n_hidden[-1], dim_z), function=None, w_zero=True, b_zero=True) # Generate Model self.generate_layers = [ Layer((dim_z, n_hidden[0]), function=nonlinear_p) ] if len(n_hidden) > 1: self.generate_layers += [ Layer(shape, function=nonlinear_p) for shape in zip(n_hidden[:-1], n_hidden[1:]) ] self.generate_mean_layer = Layer((n_hidden[-1], dim_x), function=output_f) self.generate_log_sigma_layer = Layer((n_hidden[-1], dim_x), function=None, b_zero=True) self.model_params_ = ([ param for layer in self.generate_layers for param in layer.params ] + self.recognize_mean_layer.params + self.recognize_log_sigma_layer.params + [ param for layer in self.recognize_layers for param in layer.params ] + self.generate_mean_layer.params) if self.type_px == 'gaussian': self.model_params_ += self.generate_log_sigma_layer.params
nn_path = options.model if path.isdir(nn_path) is True: raise Exception(nn_path + ': Is a directory.') if path.exists(nn_path) is False: raise Exception(nn_path + ': No such file or directory.') """ The format of the save is as following [layer:[activation,n_input,neurons,weights,biases]] """ nn_load = np.load(nn_path, allow_pickle=True) nn = NeuralNetwork() # Use all provided dataset cfg['batch_size'] = 1 # Load data set _, _, X_test, y_test = preprocessing(cfg, csv2data(dataset_path)) for x in nn_load: activation = x[0] weights = x[3] bias = x[4] nn.add_layer(Layer(activation=activation, weights=weights, bias=bias)) y_predict = nn.feed_forward(X_test) print('MSE: %f' % (nn.mean_squarred_error(y_predict, y_test))) print('CEE: %f' % (nn.cross_entropy_error(y_predict, y_test))) print('ACCURACY: %f' % (nn.accuracy(y_predict, y_test)))
def create_tab(function_text, model, learning_rate=1e-3): return { 'function': function_text, 'model': model, 'gradient': Gradient(), 'error': SquareError(), 'teacher': GradientTeacher(), 'learning_rate': learning_rate } tabs = [ create_tab(function_text="2 * x", model=MultipleLayersModel([ Layer(input_dimension=1, output_dimension=1, activation_function=LinearFunction()), Layer(input_dimension=1, output_dimension=1, activation_function=LinearFunction()) ])), create_tab(function_text="50 * x", learning_rate=1e-4, model=MultipleLayersModel([ Layer(input_dimension=1, output_dimension=1, activation_function=LinearFunction()), Layer(input_dimension=1, output_dimension=1, activation_function=LinearFunction()) ])),
class M1_GVAE(object): def __init__( self, hyper_params=None, sgd_params=None, adagrad_params=None, model_params=None ): if (sgd_params is not None) and (adagrad_params is not None): raise ValueError('Error: select only one algorithm') self.hyper_params = hyper_params self.sgd_params = sgd_params self.adagrad_params = adagrad_params self.model_params = model_params self.rng = np.random.RandomState(hyper_params['rng_seed']) self.model_params_ = None self.decode_main = None self.encode_main = None def init_model_params(self, dim_x): print 'M1 model params initialize' dim_z = self.hyper_params['dim_z'] n_hidden = self.hyper_params['n_hidden'] # [500, 500, 500] self.type_px = self.hyper_params['type_px'] def relu(x): return x*(x>0) + 0.01 * x def softplus(x): return T.log(T.exp(x) + 1) activation = {'tanh': T.tanh, 'relu': relu, 'softplus': softplus, 'sigmoid': T.nnet.sigmoid, 'none': None} nonlinear_q = activation[self.hyper_params['nonlinear_q']] nonlinear_p = activation[self.hyper_params['nonlinear_p']] if self.type_px == 'bernoulli': output_f = activation['sigmoid'] elif self.type_px == 'gaussian': output_f= activation['none'] # Recognize model self.recognize_layers = [Layer((dim_x, n_hidden[0]), function=nonlinear_q)] if len(n_hidden) > 1: self.recognize_layers += [Layer(shape, function=nonlinear_q) for shape in zip(n_hidden[:-1], n_hidden[1:])] self.recognize_mean_layer = Layer((n_hidden[-1], dim_z), function=None) self.recognize_log_sigma_layer = Layer((n_hidden[-1], dim_z), function=None, w_zero=True, b_zero=True) # Generate Model self.generate_layers = [Layer((dim_z, n_hidden[0]), function=nonlinear_p)] if len(n_hidden) > 1: self.generate_layers += [Layer(shape, function=nonlinear_p) for shape in zip(n_hidden[:-1], n_hidden[1:])] self.generate_mean_layer = Layer((n_hidden[-1], dim_x), function=output_f) self.generate_log_sigma_layer = Layer((n_hidden[-1], dim_x), function=None, b_zero=True) self.model_params_ = ( [param for layer in self.generate_layers for param in layer.params] + self.recognize_mean_layer.params + self.recognize_log_sigma_layer.params + [param for layer in self.recognize_layers for param in layer.params] + self.generate_mean_layer.params ) if self.type_px == 'gaussian': self.model_params_ += self.generate_log_sigma_layer.params def generate_model(self, Z): for i, layer in enumerate(self.generate_layers): if i == 0: layer_out = layer.fprop(Z) else: layer_out = layer.fprop(layer_out) p_mean = self.generate_mean_layer.fprop(layer_out) p_log_var = self.generate_log_sigma_layer.fprop(layer_out) return { # 'mu': 0.5 * (T.tanh(p_mean) + 1), # 0 <= mu <= 1 # 'log_sigma': 3 * T.tanh(p_log_var) - 1, # -4 <= log sigma **2 <= 2 # 'mu': T.clip(p_mean, 0., 1.), # 'log_sigma': T.clip(p_log_var, -4., 2.) 'mu': p_mean, 'log_sigma': p_log_var } def recognize_model(self, X): for i, layer in enumerate(self.recognize_layers): if i == 0: layer_out = layer.fprop(X) else: layer_out = layer.fprop(layer_out) q_mean = self.recognize_mean_layer.fprop(layer_out) q_log_var = self.recognize_log_sigma_layer.fprop(layer_out) return { 'mu': q_mean, # 'log_sigma': 3 * T.tanh(q_log_var) - 1, # 'log_sigma': T.clip(q_log_var, -4., 2.) 'log_sigma': q_log_var } def decode(self, z): if self.decode_main is None: Z = T.matrix() self.decode_main = theano.function( inputs=[Z], outputs=self.generate_model(Z)['mu'] ) return self.decode_main(z) def encode(self, x): if self.encode_main is None: X = T.matrix() self.encode_main = theano.function( inputs=[X], outputs=self.recognize_model(X)['mu'] ) return self.encode_main(x) def get_expr_lbound(self, X): n_mc_sampling = self.hyper_params['n_mc_sampling'] n_samples = X.shape[0] dim_z = self.hyper_params['dim_z'] stats_z = self.recognize_model(X) q_mean = stats_z['mu'] q_log_var = stats_z['log_sigma'] eps = self.rng_noise.normal(size=(n_mc_sampling, n_samples, dim_z)) z_tilda = q_mean + T.exp(0.5 * q_log_var) * eps stats_x = self.generate_model(z_tilda) p_mean = stats_x['mu'] p_log_var = stats_x['log_sigma'] if self.type_px == 'gaussian': log_p_x_given_z = ( -0.5 * np.log(2 * np.pi) - 0.5 * p_log_var - 0.5 * (X - p_mean) ** 2 / (2 * T.exp(p_log_var)) ) elif self.type_px == 'bernoulli': log_p_x_given_z = X * T.log(p_mean) + (1 - X) * T.log(1 - p_mean) logqz = - 0.5 * T.sum(np.log(2 * np.pi) + 1 + q_log_var) logpz = - 0.5 * T.sum(np.log(2 * np.pi) + q_mean ** 2 + T.exp(q_log_var)) consts = [] return (T.sum(log_p_x_given_z) / n_mc_sampling + (logpz - logqz)) / n_samples, consts def fit(self, x_datas): X = T.matrix() self.rng_noise = RandomStreams(self.hyper_params['rng_seed']) self.init_model_params(dim_x=x_datas.shape[1]) lbound, consts = self.get_expr_lbound(X) cost = -lbound print 'start fitting' self.hist = self.adam_calc( x_datas, cost, consts, X, self.model_params_, self.adagrad_params, self.rng ) def adagrad_calc(self, x_datas, cost, consts, X, model_params, hyper_params, rng): n_iters = hyper_params['n_iters'] learning_rate = hyper_params['learning_rate'] minibatch_size = hyper_params['minibatch_size'] n_mod_history = hyper_params['n_mod_history'] calc_history = hyper_params['calc_history'] hs = [theano.shared(np.ones( param.get_value(borrow=True).shape ).astype(theano.config.floatX)) for param in model_params] gparams = T.grad( cost=cost, wrt=model_params, consider_constant=consts ) updates = [(param, param - learning_rate / (T.sqrt(h)) * gparam) for param, gparam, h in zip(model_params, gparams, hs)] updates += [(h, h + gparam ** 2) for gparam, h in zip(gparams, hs)] train = theano.function( inputs=[X], outputs=cost, updates=updates ) validate = theano.function( inputs=[X], outputs=cost ) n_samples = x_datas.shape[0] cost_history = [] for i in xrange(n_iters): ixs = rng.permutation(n_samples)[:minibatch_size] minibatch_cost = train(x_datas[ixs]) # print minibatch_cost if np.mod(i, n_mod_history) == 0: print '%d epoch error: %f' % (i, minibatch_cost) if calc_history == 'minibatch': cost_history.append((i, minibatch_cost)) else: cost_history.append((i, validate(x_datas[ixs]))) return cost_history def adam_calc(self, x_datas, cost, consts, X, model_params, hyper_params, rng): n_iters = hyper_params['n_iters'] learning_rate = hyper_params['learning_rate'] minibatch_size = hyper_params['minibatch_size'] n_mod_history = hyper_params['n_mod_history'] calc_history = hyper_params['calc_history'] rs = [theano.shared(np.ones( param.get_value(borrow=True).shape ).astype(theano.config.floatX)) for param in model_params] vs = [theano.shared(np.ones( param.get_value(borrow=True).shape ).astype(theano.config.floatX)) for param in model_params] ts = [theano.shared(np.ones( param.get_value(borrow=True).shape ).astype(theano.config.floatX)) for param in model_params] gnma = 0.999 beta = 0.9 weight_decay = 1000 / 50000. gparams = T.grad( cost=cost, wrt=model_params, consider_constant=consts ) updates = [(param, param - learning_rate / (T.sqrt(r / (1 - gnma ** t))) * v / (1 - beta ** t)) for param, r, v, t in zip(model_params, rs, vs, ts)] updates += [(r, gnma * r + (1- gnma) * (gparam - weight_decay * param) ** 2) for param, gparam, r in zip(model_params, gparams, rs)] updates += [(v, beta * v + (1- beta) * (gparam - weight_decay * param)) for param, gparam, v in zip(model_params, gparams, vs)] updates += [(t, t + 1) for t in ts] train = theano.function( inputs=[X], outputs=cost, updates=updates ) validate = theano.function( inputs=[X], outputs=cost ) n_samples = x_datas.shape[0] cost_history = [] for i in xrange(n_iters): ixs = rng.permutation(n_samples)[:minibatch_size] minibatch_cost = train(x_datas[ixs]) # print minibatch_cost if np.mod(i, n_mod_history) == 0: print '%d epoch error: %f' % (i, minibatch_cost) if calc_history == 'minibatch': cost_history.append((i, minibatch_cost)) else: cost_history.append((i, validate(x_datas[ixs]))) return cost_history