Beispiel #1
0
def Discriminator(real, fake):
    in_len = 784
    m_disc = 512

    batch_size = 64

    pair = T.concatenate([real, fake], axis=0)

    h1 = HiddenLayer(in_len, m_disc)
    h2 = HiddenLayer(m_disc, m_disc)
    h3 = HiddenLayer(m_disc, 1, activation='sigmoid')

    pc1 = h1.output(pair)
    pc2 = h2.output(pc1)
    pc3 = h3.output(pc2)

    p_real = pc3[:real.shape[0], :].flatten()
    p_gen = pc3[-real.shape[0]:, :].flatten()

    d_cost_real = binary_crossentropy(p_real, T.ones(p_real.shape)).mean()
    d_cost_real = (d_cost_real * (d_cost_real < 0.9)).mean()
    d_cost_gen = binary_crossentropy(p_gen, T.zeros(p_gen.shape)).mean()
    d_cost_gen = (d_cost_gen * (d_cost_gen > 0.1)).mean()
    g_cost_d = binary_crossentropy(p_gen, T.ones(p_gen.shape)).mean()

    d_cost = (d_cost_real + d_cost_gen) / 2.0
    g_cost = g_cost_d

    layers = [h1, h2, h3]
    params = layers2params(layers)

    return d_cost, g_cost, params
Beispiel #2
0
    def get_reconstruction_cost(self, pre_sigmoid_nv):
        """Approximation to the reconstruction error

        Note that this function requires the pre-sigmoid activation as
        input.  To understand why this is so you need to understand a
        bit about how Theano works. Whenever you compile a Theano
        function, the computational graph that you pass as input gets
        optimized for speed and stability.  This is done by changing
        several parts of the subgraphs with others.  One such
        optimization expresses terms of the form log(sigmoid(x)) in
        terms of softplus.  We need this optimization for the
        cross-entropy since sigmoid of numbers larger than 30. (or
        even less then that) turn to 1. and numbers smaller than
        -30. turn to 0 which in terms will force theano to compute
        log(0) and therefore we will get either -inf or NaN as
        cost. If the value is expressed in terms of softplus we do not
        get this undesirable behaviour. This optimization usually
        works fine, but here we have a special case. The sigmoid is
        applied inside the scan op, while the log is
        outside. Therefore Theano will only see log(scan(..)) instead
        of log(sigmoid(..)) and will not apply the wanted
        optimization. We can not go and replace the sigmoid in scan
        with something else also, because this only needs to be done
        on the last step. Therefore the easiest and more efficient way
        is to get also the pre-sigmoid activation as an output of
        scan, and apply both the log and sigmoid outside scan such
        that Theano can catch and optimize the expression.

        """

        cross_entropy = nnet.binary_crossentropy(
                            nnet.sigmoid(pre_sigmoid_nv),self.input).sum(axis=1).mean()

        return cross_entropy
    def get_loss(self):
        """
        The mean of the binary cross-entropy tensor, where binary cross-entropy is applied element-wise:
        crossentropy(target,input) = -(target*log(input) + (1 - target)*(log(1 - input))).

        Returns
        -------
        theano expression
            The loss function.
        """
        input = self.inputs[0]
        target = self.targets[0]
        return mean(nnet.binary_crossentropy(input, target))
Beispiel #4
0
def binary_cross_entropy(obj):
    obj.out = T.clip(obj.out, _EPSILON, 1.0 - _EPSILON)
    obj.loss = nnet.binary_crossentropy(obj.out, obj.y).mean()

    # classification accuracy
    diff = obj.y - obj.out
    #obj.train_acc = (T.eq(obj.out,obj.y).sum().astype(theano.config.floatX) / obj.n_batch)
    #obj.valid_acc = (T.eq(obj.out,obj.y).sum().astype(theano.config.floatX) / obj.x_test_arr.shape[0])
    obj.train_acc = 1 - ((diff**2).sum() /
                         ((obj.y.flatten() - obj.y.mean())**2).sum())
    obj.valid_acc = 1 - ((diff**2).sum() /
                         ((obj.y.flatten() - obj.y.mean())**2).sum())

    return obj
def __step(img, prev_bbox, state, timestep):
	conv1 = conv2d(img, conv1_filters, subsample=(conv1_stride, conv1_stride), border_mode='half')
	act1 = NN.relu(conv1)
	flat1 = TT.reshape(act1, (-1, conv1_output_dim))
	gru_in = TT.concatenate([flat1, prev_bbox], axis=1)
	gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz)
	gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br)
	gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg)
	gru_h = (1 - gru_z) * state + gru_z * gru_h_
	bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2)

        bbox_cx = ((bbox[:, 2] + bbox[:, 0]) / 2 + 1) / 2 * img_row
        bbox_cy = ((bbox[:, 3] + bbox[:, 1]) / 2 + 1) / 2 * img_col
        bbox_w = TT.abs_(bbox[:, 2] - bbox[:, 0]) / 2 * img_row
        bbox_h = TT.abs_(bbox[:, 3] - bbox[:, 1]) / 2 * img_col
        x = TT.arange(img_row, dtype=T.config.floatX)
        y = TT.arange(img_col, dtype=T.config.floatX)
	mx = TT.maximum(TT.minimum(-TT.abs_(x.dimshuffle('x', 0) - bbox_cx.dimshuffle(0, 'x')) + bbox_w.dimshuffle(0, 'x') / 2., 1), 1e-4)
	my = TT.maximum(TT.minimum(-TT.abs_(y.dimshuffle('x', 0) - bbox_cy.dimshuffle(0, 'x')) + bbox_h.dimshuffle(0, 'x') / 2., 1), 1e-4)
        bbox_mask = mx.dimshuffle(0, 1, 'x') * my.dimshuffle(0, 'x', 1)

        new_cls1_f = cls_f
        new_cls1_b = cls_b

        mask = act1 * bbox_mask.dimshuffle(0, 'x', 1, 2)

        new_featmaps = TG.disconnected_grad(TT.set_subtensor(featmaps[:, timestep], mask))
	new_featmaps.name = 'new_featmaps'
        new_probmaps = TG.disconnected_grad(TT.set_subtensor(probmaps[:, timestep], bbox_mask))
	new_probmaps.name = 'new_probmaps'

        train_featmaps = TG.disconnected_grad(new_featmaps[:, :timestep+1].reshape(((timestep + 1) * batch_size, conv1_nr_filters, img_row, img_col)))
	train_featmaps.name = 'train_featmaps'
        train_probmaps = TG.disconnected_grad(new_probmaps[:, :timestep+1])
	train_probmaps.name = 'train_probmaps'

        for _ in range(0, 5):
		train_convmaps = conv2d(train_featmaps, new_cls1_f, subsample=(cls1_stride, cls1_stride), border_mode='half').reshape((batch_size, timestep + 1, batch_size, img_row, img_col))
		train_convmaps.name = 'train_convmaps'
		train_convmaps_selected = train_convmaps[TT.arange(batch_size).repeat(timestep+1), TT.tile(TT.arange(timestep+1), batch_size), TT.arange(batch_size).repeat(timestep+1)].reshape((batch_size, timestep+1, img_row, img_col))
		train_convmaps_selected.name = 'train_convmaps_selected'
		train_predmaps = NN.sigmoid(train_convmaps_selected + new_cls1_b.dimshuffle(0, 'x', 'x', 'x'))
		train_loss = NN.binary_crossentropy(train_predmaps, train_probmaps).mean()
                train_grad_cls1_f, train_grad_cls1_b = T.grad(train_loss, [new_cls1_f, new_cls1_b])
                new_cls1_f -= train_grad_cls1_f * 0.1
                new_cls1_b -= train_grad_cls1_b * 0.1

	return (bbox, gru_h, timestep + 1, mask, bbox_mask), {cls_f: TG.disconnected_grad(new_cls1_f), cls_b: TG.disconnected_grad(new_cls1_b), featmaps: TG.disconnected_grad(new_featmaps), probmaps: TG.disconnected_grad(new_probmaps)}
    def build_artificial_neural_network(self, no_of_input_nodes, hidden_nodes_topology, output_nodes_no, act_func, error_func):
        network_topology = [no_of_input_nodes] + hidden_nodes_topology + [output_nodes_no]

        weights = []
        biases = []

        for i in range(len(network_topology) - 1):
            weights.append(theano.shared(np.random.uniform(-.1, .1, size=(network_topology[i], network_topology[i+1]))))

        input_vector = T.dvector('input')
        expected_output_vector = T.dvector('expected_output')

        for i in range(1, len(network_topology)):
            biases.append(theano.shared(np.random.uniform(-.1, .1, size=network_topology[i])))

        if act_func == 'sigmoid':
            activation_function = Tann.sigmoid
        elif act_func == 'tanh':
            activation_function = T.tanh
        else:
            raise ValueError('Activation function must be "sigmoid" or "tanh"')

        outputs = [activation_function(T.dot(input_vector, weights[0]) + biases[0])]

        for i in range(1, len(network_topology)-1):
            outputs.append(activation_function(T.dot(outputs[i-1], weights[i]) + biases[i]))

        if error_func == 'squared':
            error = T.sum((expected_output_vector - outputs[-1])**2)
        elif error_func == 'binary_cross':
            error = Tann.binary_crossentropy(outputs[-1], expected_output_vector).mean()
        else:
            raise ValueError('Error function must be "squared" or "binary_cross"')

        params = []

        for i in range(len(network_topology) - 1):
            params.append(weights[i])
            params.append(biases[i])

        gradients = T.grad(error, params)
        back_propagation_activations = [(p, p-self.learning_rate*g) for p, g in zip(params, gradients)]

        self.trainer = theano.function([input_vector, expected_output_vector], error, updates=back_propagation_activations)
        self.predictor = theano.function([input_vector], outputs[-1])
        self.error_for_input = theano.function([input_vector, expected_output_vector], error)
from theano.tensor.nnet import sigmoid, binary_crossentropy

#读入csv数据,每一行的格式都是1,0.697,0.46,是\n
file = open('data/西瓜数据集3.0.csv'.decode('utf-8'))
data = [raw.strip('\n').split(',') for raw in file]
X = [[float(raw[-3]), float(raw[-2])] for raw in data[1:]]
Y = [1 if raw[-1]=='是' else 0 for raw in data[1:]]

feats = len(X[0])
lrate = 1
maxturn = 10000
x = T.dmatrix('x')
y = T.vector('y')
w = theano.shared(rng.normal(size=feats), name='w')
b = theano.shared(rng.randn(), name='b')

z = T.dot(x, w) + b
p = sigmoid(z)
cost = binary_crossentropy(p, y).mean()
gw, gb = theano.grad(cost, [w, b])
pred_res = p > 0.5
fit = theano.function(inputs=[x, y], outputs=[cost, gw, gb], updates=((w, w - lrate * gw), (b, b - lrate * gb)))
predict = theano.function(inputs=[x], outputs=[pred_res])

for i in range(maxturn):
    print fit(X, Y)
train_res = predict(X)[0]
print 'predict result:'
print train_res
print 'accuracy:'
print float(sum([Y[i] == train_res[i] for i in range(len(Y))])) / len(Y)
Beispiel #8
0
    def cross_entropy_binary(self,y):
	output = T.clip(self.p_y_given_x, 1e-7, 1-(1e-7))
	return T.sum(binary_crossentropy(output,y),axis=1)
    def build_artificial_neural_network(self, no_of_input_nodes,
                                        hidden_nodes_topology, output_nodes_no,
                                        act_func, error_func):
        network_topology = [no_of_input_nodes
                            ] + hidden_nodes_topology + [output_nodes_no]

        weights = []
        biases = []

        for i in range(len(network_topology) - 1):
            weights.append(
                theano.shared(
                    np.random.uniform(-.1,
                                      .1,
                                      size=(network_topology[i],
                                            network_topology[i + 1]))))

        input_vector = T.dvector('input')
        expected_output_vector = T.dvector('expected_output')

        for i in range(1, len(network_topology)):
            biases.append(
                theano.shared(
                    np.random.uniform(-.1, .1, size=network_topology[i])))

        if act_func == 'sigmoid':
            activation_function = Tann.sigmoid
        elif act_func == 'tanh':
            activation_function = T.tanh
        else:
            raise ValueError('Activation function must be "sigmoid" or "tanh"')

        outputs = [
            activation_function(T.dot(input_vector, weights[0]) + biases[0])
        ]

        for i in range(1, len(network_topology) - 1):
            outputs.append(
                activation_function(
                    T.dot(outputs[i - 1], weights[i]) + biases[i]))

        if error_func == 'squared':
            error = T.sum((expected_output_vector - outputs[-1])**2)
        elif error_func == 'binary_cross':
            error = Tann.binary_crossentropy(outputs[-1],
                                             expected_output_vector).mean()
        else:
            raise ValueError(
                'Error function must be "squared" or "binary_cross"')

        params = []

        for i in range(len(network_topology) - 1):
            params.append(weights[i])
            params.append(biases[i])

        gradients = T.grad(error, params)
        back_propagation_activations = [(p, p - self.learning_rate * g)
                                        for p, g in zip(params, gradients)]

        self.trainer = theano.function([input_vector, expected_output_vector],
                                       error,
                                       updates=back_propagation_activations)
        self.predictor = theano.function([input_vector], outputs[-1])
        self.error_for_input = theano.function(
            [input_vector, expected_output_vector], error)
Beispiel #10
0
# b0 = theano.shared(asarray([-1.5, 1.0]), 'b0')
# w1 = theano.shared(asarray([-1.5, -1.5]), 'w1')
# b1 = theano.shared(1.0, 'b1')

# Random weights
w0 = theano.shared(initial_scale * rng.randn(nhidden, 2), 'w0')
b0 = theano.shared(initial_scale * rng.randn(nhidden), 'b0')
w1 = theano.shared(initial_scale * rng.randn(nhidden), 'w1')
b1 = theano.shared(initial_scale * rng.randn(), 'b1')

# Net
x0 = T.fvector('x0')
yt = T.fvector('yt')
x1 = nnet.sigmoid(T.dot(w0, x0) + b0)
x2 = nnet.sigmoid(T.dot(w1, x1) + b1)
error = nnet.binary_crossentropy(x2, yt).mean()
grad_w0, grad_b0, grad_w1, grad_b1 = T.grad(error, [w0, b0, w1, b1])

predict = theano.function(inputs=[x0], outputs=x2)
train = theano.function(inputs=[x0, yt],
                        outputs=[x2, error],
                        updates=((w0, w0 - learning_rate * grad_w0), (b0, b0 - learning_rate * grad_b0),
                                 (w1, w1 - learning_rate * grad_w1), (b1, b1 - learning_rate * grad_b1)))

### Training loop ###

print("Predict:")
for (xd,yd) in training_data:
    print("\t%r -> %f" % (xd, predict(xd)))

print("Train:")
    def create(self, **params):
        """
        Create the neural network training functions. Create a cost 
        function to minimize, compute the gradients of the cost with respect
        to all the parameters, and compile functions for training the network,
        measuring the cost for a given data set, and predicting output values
        for a given set of inputs.

        This function must be called prior to train(), cost(), output(), or predict().

        Available parameters are:
            L1 - L1 regularization parameter. 
                 This parameter can be overridden for each layer by calling layer.regularize().
            L2 - L2 regularization parameter
                 This parameter can be overridden for each layer by calling layer.regularize().
            learning_rate - minibatch gradient descent learning rate
                 This parameter can be overridden for each layer by calling layer.regularize().

        """
        logging.info("Creating neural network %s" % self.name)

        if not self.initialized:
            self.initialize()

        for line in str(self).split("\n"):
            logging.info(line)

        # Use default values for parameters if none provided
        default_values = {
            'L1' : 0.0,
            'L2' : 0.0,
            'learning_rate' : 0.1
        }
        for key in default_values:
            if not key in params:
                params[key] = default_values[key]

        # Set regularization if it isn't set for each layer
        for layer in self.layers:
            if not hasattr(layer, 'regularization'):
                layer.regularization = {'L1': params['L1'], 'L2': params['L2']}
            if not hasattr(layer, 'learning_rate'):
                layer.learning_rate = params['learning_rate']

        # Input and output layers have symbolic variables
        input_data = self.layers[0].layer_output()
        output_data = self.layers[-1].layer_output()
        output_data_test = self.layers[-1].layer_output(test = True)

        # Create the cost function to minimize
        labels = tensor.dmatrix("labels")
        cost = binary_crossentropy(output_data, labels).mean()

        # Regularization penalty for large weights
        for layer in self.layers:
            for param in layer.params:
                param = param.flatten()
                cost += layer.regularization['L1'] * tensor.sum(abs(param))
                cost += layer.regularization['L2'] * tensor.sum(param ** 2)

        # Compute update rule
        updates = {}
        for layer in self.layers:
            gradients = grad(cost, layer.params)
            for (variable, gradient) in zip(layer.params, gradients):
                updates[variable] = variable - layer.learning_rate * gradient

        # Get outputs of all layers
        layer_outputs = [layer.layer_output(test=True) for layer in self.layers]

        # Compile all Theano functions
        mode = ProfileMode(optimizer='fast_run', linker=gof.OpWiseCLinker())
        self.mode = mode
        self.theano_train        = function(inputs=[input_data, labels],  outputs=cost, updates=updates)
        self.theano_get_cost     = function(inputs=[input_data, labels],  outputs=cost)
        self.theano_get_output   = function(inputs=[input_data],          outputs=output_data_test)
        self.theano_get_layers   = function(inputs=[input_data],          outputs=layer_outputs)

        # Ready to train
        self.initialized = True