def Discriminator(real, fake): in_len = 784 m_disc = 512 batch_size = 64 pair = T.concatenate([real, fake], axis=0) h1 = HiddenLayer(in_len, m_disc) h2 = HiddenLayer(m_disc, m_disc) h3 = HiddenLayer(m_disc, 1, activation='sigmoid') pc1 = h1.output(pair) pc2 = h2.output(pc1) pc3 = h3.output(pc2) p_real = pc3[:real.shape[0], :].flatten() p_gen = pc3[-real.shape[0]:, :].flatten() d_cost_real = binary_crossentropy(p_real, T.ones(p_real.shape)).mean() d_cost_real = (d_cost_real * (d_cost_real < 0.9)).mean() d_cost_gen = binary_crossentropy(p_gen, T.zeros(p_gen.shape)).mean() d_cost_gen = (d_cost_gen * (d_cost_gen > 0.1)).mean() g_cost_d = binary_crossentropy(p_gen, T.ones(p_gen.shape)).mean() d_cost = (d_cost_real + d_cost_gen) / 2.0 g_cost = g_cost_d layers = [h1, h2, h3] params = layers2params(layers) return d_cost, g_cost, params
def get_reconstruction_cost(self, pre_sigmoid_nv): """Approximation to the reconstruction error Note that this function requires the pre-sigmoid activation as input. To understand why this is so you need to understand a bit about how Theano works. Whenever you compile a Theano function, the computational graph that you pass as input gets optimized for speed and stability. This is done by changing several parts of the subgraphs with others. One such optimization expresses terms of the form log(sigmoid(x)) in terms of softplus. We need this optimization for the cross-entropy since sigmoid of numbers larger than 30. (or even less then that) turn to 1. and numbers smaller than -30. turn to 0 which in terms will force theano to compute log(0) and therefore we will get either -inf or NaN as cost. If the value is expressed in terms of softplus we do not get this undesirable behaviour. This optimization usually works fine, but here we have a special case. The sigmoid is applied inside the scan op, while the log is outside. Therefore Theano will only see log(scan(..)) instead of log(sigmoid(..)) and will not apply the wanted optimization. We can not go and replace the sigmoid in scan with something else also, because this only needs to be done on the last step. Therefore the easiest and more efficient way is to get also the pre-sigmoid activation as an output of scan, and apply both the log and sigmoid outside scan such that Theano can catch and optimize the expression. """ cross_entropy = nnet.binary_crossentropy( nnet.sigmoid(pre_sigmoid_nv),self.input).sum(axis=1).mean() return cross_entropy
def get_loss(self): """ The mean of the binary cross-entropy tensor, where binary cross-entropy is applied element-wise: crossentropy(target,input) = -(target*log(input) + (1 - target)*(log(1 - input))). Returns ------- theano expression The loss function. """ input = self.inputs[0] target = self.targets[0] return mean(nnet.binary_crossentropy(input, target))
def binary_cross_entropy(obj): obj.out = T.clip(obj.out, _EPSILON, 1.0 - _EPSILON) obj.loss = nnet.binary_crossentropy(obj.out, obj.y).mean() # classification accuracy diff = obj.y - obj.out #obj.train_acc = (T.eq(obj.out,obj.y).sum().astype(theano.config.floatX) / obj.n_batch) #obj.valid_acc = (T.eq(obj.out,obj.y).sum().astype(theano.config.floatX) / obj.x_test_arr.shape[0]) obj.train_acc = 1 - ((diff**2).sum() / ((obj.y.flatten() - obj.y.mean())**2).sum()) obj.valid_acc = 1 - ((diff**2).sum() / ((obj.y.flatten() - obj.y.mean())**2).sum()) return obj
def __step(img, prev_bbox, state, timestep): conv1 = conv2d(img, conv1_filters, subsample=(conv1_stride, conv1_stride), border_mode='half') act1 = NN.relu(conv1) flat1 = TT.reshape(act1, (-1, conv1_output_dim)) gru_in = TT.concatenate([flat1, prev_bbox], axis=1) gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz) gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br) gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg) gru_h = (1 - gru_z) * state + gru_z * gru_h_ bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2) bbox_cx = ((bbox[:, 2] + bbox[:, 0]) / 2 + 1) / 2 * img_row bbox_cy = ((bbox[:, 3] + bbox[:, 1]) / 2 + 1) / 2 * img_col bbox_w = TT.abs_(bbox[:, 2] - bbox[:, 0]) / 2 * img_row bbox_h = TT.abs_(bbox[:, 3] - bbox[:, 1]) / 2 * img_col x = TT.arange(img_row, dtype=T.config.floatX) y = TT.arange(img_col, dtype=T.config.floatX) mx = TT.maximum(TT.minimum(-TT.abs_(x.dimshuffle('x', 0) - bbox_cx.dimshuffle(0, 'x')) + bbox_w.dimshuffle(0, 'x') / 2., 1), 1e-4) my = TT.maximum(TT.minimum(-TT.abs_(y.dimshuffle('x', 0) - bbox_cy.dimshuffle(0, 'x')) + bbox_h.dimshuffle(0, 'x') / 2., 1), 1e-4) bbox_mask = mx.dimshuffle(0, 1, 'x') * my.dimshuffle(0, 'x', 1) new_cls1_f = cls_f new_cls1_b = cls_b mask = act1 * bbox_mask.dimshuffle(0, 'x', 1, 2) new_featmaps = TG.disconnected_grad(TT.set_subtensor(featmaps[:, timestep], mask)) new_featmaps.name = 'new_featmaps' new_probmaps = TG.disconnected_grad(TT.set_subtensor(probmaps[:, timestep], bbox_mask)) new_probmaps.name = 'new_probmaps' train_featmaps = TG.disconnected_grad(new_featmaps[:, :timestep+1].reshape(((timestep + 1) * batch_size, conv1_nr_filters, img_row, img_col))) train_featmaps.name = 'train_featmaps' train_probmaps = TG.disconnected_grad(new_probmaps[:, :timestep+1]) train_probmaps.name = 'train_probmaps' for _ in range(0, 5): train_convmaps = conv2d(train_featmaps, new_cls1_f, subsample=(cls1_stride, cls1_stride), border_mode='half').reshape((batch_size, timestep + 1, batch_size, img_row, img_col)) train_convmaps.name = 'train_convmaps' train_convmaps_selected = train_convmaps[TT.arange(batch_size).repeat(timestep+1), TT.tile(TT.arange(timestep+1), batch_size), TT.arange(batch_size).repeat(timestep+1)].reshape((batch_size, timestep+1, img_row, img_col)) train_convmaps_selected.name = 'train_convmaps_selected' train_predmaps = NN.sigmoid(train_convmaps_selected + new_cls1_b.dimshuffle(0, 'x', 'x', 'x')) train_loss = NN.binary_crossentropy(train_predmaps, train_probmaps).mean() train_grad_cls1_f, train_grad_cls1_b = T.grad(train_loss, [new_cls1_f, new_cls1_b]) new_cls1_f -= train_grad_cls1_f * 0.1 new_cls1_b -= train_grad_cls1_b * 0.1 return (bbox, gru_h, timestep + 1, mask, bbox_mask), {cls_f: TG.disconnected_grad(new_cls1_f), cls_b: TG.disconnected_grad(new_cls1_b), featmaps: TG.disconnected_grad(new_featmaps), probmaps: TG.disconnected_grad(new_probmaps)}
def build_artificial_neural_network(self, no_of_input_nodes, hidden_nodes_topology, output_nodes_no, act_func, error_func): network_topology = [no_of_input_nodes] + hidden_nodes_topology + [output_nodes_no] weights = [] biases = [] for i in range(len(network_topology) - 1): weights.append(theano.shared(np.random.uniform(-.1, .1, size=(network_topology[i], network_topology[i+1])))) input_vector = T.dvector('input') expected_output_vector = T.dvector('expected_output') for i in range(1, len(network_topology)): biases.append(theano.shared(np.random.uniform(-.1, .1, size=network_topology[i]))) if act_func == 'sigmoid': activation_function = Tann.sigmoid elif act_func == 'tanh': activation_function = T.tanh else: raise ValueError('Activation function must be "sigmoid" or "tanh"') outputs = [activation_function(T.dot(input_vector, weights[0]) + biases[0])] for i in range(1, len(network_topology)-1): outputs.append(activation_function(T.dot(outputs[i-1], weights[i]) + biases[i])) if error_func == 'squared': error = T.sum((expected_output_vector - outputs[-1])**2) elif error_func == 'binary_cross': error = Tann.binary_crossentropy(outputs[-1], expected_output_vector).mean() else: raise ValueError('Error function must be "squared" or "binary_cross"') params = [] for i in range(len(network_topology) - 1): params.append(weights[i]) params.append(biases[i]) gradients = T.grad(error, params) back_propagation_activations = [(p, p-self.learning_rate*g) for p, g in zip(params, gradients)] self.trainer = theano.function([input_vector, expected_output_vector], error, updates=back_propagation_activations) self.predictor = theano.function([input_vector], outputs[-1]) self.error_for_input = theano.function([input_vector, expected_output_vector], error)
from theano.tensor.nnet import sigmoid, binary_crossentropy #读入csv数据,每一行的格式都是1,0.697,0.46,是\n file = open('data/西瓜数据集3.0.csv'.decode('utf-8')) data = [raw.strip('\n').split(',') for raw in file] X = [[float(raw[-3]), float(raw[-2])] for raw in data[1:]] Y = [1 if raw[-1]=='是' else 0 for raw in data[1:]] feats = len(X[0]) lrate = 1 maxturn = 10000 x = T.dmatrix('x') y = T.vector('y') w = theano.shared(rng.normal(size=feats), name='w') b = theano.shared(rng.randn(), name='b') z = T.dot(x, w) + b p = sigmoid(z) cost = binary_crossentropy(p, y).mean() gw, gb = theano.grad(cost, [w, b]) pred_res = p > 0.5 fit = theano.function(inputs=[x, y], outputs=[cost, gw, gb], updates=((w, w - lrate * gw), (b, b - lrate * gb))) predict = theano.function(inputs=[x], outputs=[pred_res]) for i in range(maxturn): print fit(X, Y) train_res = predict(X)[0] print 'predict result:' print train_res print 'accuracy:' print float(sum([Y[i] == train_res[i] for i in range(len(Y))])) / len(Y)
def cross_entropy_binary(self,y): output = T.clip(self.p_y_given_x, 1e-7, 1-(1e-7)) return T.sum(binary_crossentropy(output,y),axis=1)
def build_artificial_neural_network(self, no_of_input_nodes, hidden_nodes_topology, output_nodes_no, act_func, error_func): network_topology = [no_of_input_nodes ] + hidden_nodes_topology + [output_nodes_no] weights = [] biases = [] for i in range(len(network_topology) - 1): weights.append( theano.shared( np.random.uniform(-.1, .1, size=(network_topology[i], network_topology[i + 1])))) input_vector = T.dvector('input') expected_output_vector = T.dvector('expected_output') for i in range(1, len(network_topology)): biases.append( theano.shared( np.random.uniform(-.1, .1, size=network_topology[i]))) if act_func == 'sigmoid': activation_function = Tann.sigmoid elif act_func == 'tanh': activation_function = T.tanh else: raise ValueError('Activation function must be "sigmoid" or "tanh"') outputs = [ activation_function(T.dot(input_vector, weights[0]) + biases[0]) ] for i in range(1, len(network_topology) - 1): outputs.append( activation_function( T.dot(outputs[i - 1], weights[i]) + biases[i])) if error_func == 'squared': error = T.sum((expected_output_vector - outputs[-1])**2) elif error_func == 'binary_cross': error = Tann.binary_crossentropy(outputs[-1], expected_output_vector).mean() else: raise ValueError( 'Error function must be "squared" or "binary_cross"') params = [] for i in range(len(network_topology) - 1): params.append(weights[i]) params.append(biases[i]) gradients = T.grad(error, params) back_propagation_activations = [(p, p - self.learning_rate * g) for p, g in zip(params, gradients)] self.trainer = theano.function([input_vector, expected_output_vector], error, updates=back_propagation_activations) self.predictor = theano.function([input_vector], outputs[-1]) self.error_for_input = theano.function( [input_vector, expected_output_vector], error)
# b0 = theano.shared(asarray([-1.5, 1.0]), 'b0') # w1 = theano.shared(asarray([-1.5, -1.5]), 'w1') # b1 = theano.shared(1.0, 'b1') # Random weights w0 = theano.shared(initial_scale * rng.randn(nhidden, 2), 'w0') b0 = theano.shared(initial_scale * rng.randn(nhidden), 'b0') w1 = theano.shared(initial_scale * rng.randn(nhidden), 'w1') b1 = theano.shared(initial_scale * rng.randn(), 'b1') # Net x0 = T.fvector('x0') yt = T.fvector('yt') x1 = nnet.sigmoid(T.dot(w0, x0) + b0) x2 = nnet.sigmoid(T.dot(w1, x1) + b1) error = nnet.binary_crossentropy(x2, yt).mean() grad_w0, grad_b0, grad_w1, grad_b1 = T.grad(error, [w0, b0, w1, b1]) predict = theano.function(inputs=[x0], outputs=x2) train = theano.function(inputs=[x0, yt], outputs=[x2, error], updates=((w0, w0 - learning_rate * grad_w0), (b0, b0 - learning_rate * grad_b0), (w1, w1 - learning_rate * grad_w1), (b1, b1 - learning_rate * grad_b1))) ### Training loop ### print("Predict:") for (xd,yd) in training_data: print("\t%r -> %f" % (xd, predict(xd))) print("Train:")
def create(self, **params): """ Create the neural network training functions. Create a cost function to minimize, compute the gradients of the cost with respect to all the parameters, and compile functions for training the network, measuring the cost for a given data set, and predicting output values for a given set of inputs. This function must be called prior to train(), cost(), output(), or predict(). Available parameters are: L1 - L1 regularization parameter. This parameter can be overridden for each layer by calling layer.regularize(). L2 - L2 regularization parameter This parameter can be overridden for each layer by calling layer.regularize(). learning_rate - minibatch gradient descent learning rate This parameter can be overridden for each layer by calling layer.regularize(). """ logging.info("Creating neural network %s" % self.name) if not self.initialized: self.initialize() for line in str(self).split("\n"): logging.info(line) # Use default values for parameters if none provided default_values = { 'L1' : 0.0, 'L2' : 0.0, 'learning_rate' : 0.1 } for key in default_values: if not key in params: params[key] = default_values[key] # Set regularization if it isn't set for each layer for layer in self.layers: if not hasattr(layer, 'regularization'): layer.regularization = {'L1': params['L1'], 'L2': params['L2']} if not hasattr(layer, 'learning_rate'): layer.learning_rate = params['learning_rate'] # Input and output layers have symbolic variables input_data = self.layers[0].layer_output() output_data = self.layers[-1].layer_output() output_data_test = self.layers[-1].layer_output(test = True) # Create the cost function to minimize labels = tensor.dmatrix("labels") cost = binary_crossentropy(output_data, labels).mean() # Regularization penalty for large weights for layer in self.layers: for param in layer.params: param = param.flatten() cost += layer.regularization['L1'] * tensor.sum(abs(param)) cost += layer.regularization['L2'] * tensor.sum(param ** 2) # Compute update rule updates = {} for layer in self.layers: gradients = grad(cost, layer.params) for (variable, gradient) in zip(layer.params, gradients): updates[variable] = variable - layer.learning_rate * gradient # Get outputs of all layers layer_outputs = [layer.layer_output(test=True) for layer in self.layers] # Compile all Theano functions mode = ProfileMode(optimizer='fast_run', linker=gof.OpWiseCLinker()) self.mode = mode self.theano_train = function(inputs=[input_data, labels], outputs=cost, updates=updates) self.theano_get_cost = function(inputs=[input_data, labels], outputs=cost) self.theano_get_output = function(inputs=[input_data], outputs=output_data_test) self.theano_get_layers = function(inputs=[input_data], outputs=layer_outputs) # Ready to train self.initialized = True