예제 #1
0
def get_network(model):

    input_data = tensor.dmatrix('x')
    targets_var = tensor.dmatrix('y')

    network = layers.InputLayer((model['batch_size'], model['input_vars']), input_data)

    nonlin = nonlinearities.rectify
    if model['hidden_nonlinearity'] != 'ReLu':
        nonlin = nonlinearities.tanh

    prev_layer = network

    for l in range(model['nlayers']):
        fc = layers.DenseLayer(prev_layer, model['units'], nonlinearity=nonlin)
        if model['dropout']:
            fc = layers.DropoutLayer(fc, 0.5)
        prev_layer = fc

    output_lin = None
    if model['output_mode'] == OUTPUT_LOG:
        output_lin = nonlinearities.tanh
    output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin)

    predictions = layers.get_output(output_layer)

    if model['output_mode'] == OUTPUT_BOUNDED:
        (minth, maxth) = model['maxmin'][model['control']]
        maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth)
        mint = theano.shared(np.ones((model['batch_size'], 1)) * minth)
        predictions = tensor.min(tensor.concatenate([maxt, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))
        predictions = tensor.max(tensor.concatenate([mint, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))

    loss = objectives.squared_error(predictions, targets_var)
    loss = objectives.aggregate(loss, mode='mean')

    params = layers.get_all_params(output_layer)

    test_prediction = layers.get_output(output_layer, deterministic=True)
    test_loss = objectives.squared_error(test_prediction,  targets_var)
    test_loss = test_loss.mean()

    updates_sgd = updates.sgd(loss, params, learning_rate=model['lr'])
    ups = updates.apply_momentum(updates_sgd, params, momentum=0.9)

    train_fn = theano.function([input_data, targets_var], loss, updates=ups)
    pred_fn = theano.function([input_data], predictions)
    val_fn = theano.function([input_data, targets_var], test_loss)

    return {'train': train_fn, 'eval': val_fn, 'pred': pred_fn, 'layers': output_layer}
예제 #2
0
def get_model(input_var, target_var, multiply_var):

    # input layer with unspecified batch size
    layer_both_0         = InputLayer(shape=(None, 30, 64, 64), input_var=input_var)

    # Z-score?

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_both_1         = batch_norm(Conv2DLayer(layer_both_0, 64, (3, 3), pad='same', nonlinearity=leaky_rectify))
    layer_both_2         = batch_norm(Conv2DLayer(layer_both_1, 64, (3, 3), pad='same', nonlinearity=leaky_rectify))
    layer_both_3         = MaxPool2DLayer(layer_both_2, pool_size=(2, 2), stride=(2, 2), pad=(1, 1))
    layer_both_4         = DropoutLayer(layer_both_3, p=0.25)

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_both_5         = batch_norm(Conv2DLayer(layer_both_4, 128, (3, 3), pad='same', nonlinearity=leaky_rectify))
    layer_both_6         = batch_norm(Conv2DLayer(layer_both_5, 128, (3, 3), pad='same', nonlinearity=leaky_rectify))
    layer_both_7         = MaxPool2DLayer(layer_both_6, pool_size=(2, 2), stride=(2, 2), pad=(1, 1))
    layer_both_8         = DropoutLayer(layer_both_7, p=0.25)

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_both_9         = batch_norm(Conv2DLayer(layer_both_8, 256, (3, 3), pad='same', nonlinearity=leaky_rectify))
    layer_both_10        = batch_norm(Conv2DLayer(layer_both_9, 256, (3, 3), pad='same', nonlinearity=leaky_rectify))
    layer_both_11        = batch_norm(Conv2DLayer(layer_both_10, 256, (3, 3), pad='same', nonlinearity=leaky_rectify))
    layer_both_12        = MaxPool2DLayer(layer_both_11, pool_size=(2, 2), stride=(2, 2), pad=(1, 1))
    layer_both_13        = DropoutLayer(layer_both_12, p=0.25)

    # Flatten
    layer_flatten        = FlattenLayer(layer_both_13)

    # Prediction
    layer_hidden         = DenseLayer(layer_flatten, 500, nonlinearity=sigmoid)
    layer_prediction     = DenseLayer(layer_hidden, 2, nonlinearity=linear)

    # Loss
    prediction           = get_output(layer_prediction) / multiply_var
    loss                 = squared_error(prediction, target_var)
    loss                 = loss.mean()


    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params               = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction      = get_output(layer_prediction, deterministic=True) / multiply_var
    test_loss            = squared_error(test_prediction, target_var)
    test_loss            = test_loss.mean()

    # crps estimate
    crps                 = T.abs_(test_prediction - target_var).mean()/600

    return test_prediction, crps, loss, params
예제 #3
0
def get_model(input_var, target_var, multiply_var):

    # input layer with unspecified batch size
    layer_input     = InputLayer(shape=(None, 12, 64, 64), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer_0         = DimshuffleLayer(layer_input, (0, 'x', 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_1         = batch_norm(Conv3DDNNLayer(incoming=layer_0, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_2         = batch_norm(Conv3DDNNLayer(incoming=layer_1, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_3         = MaxPool3DDNNLayer(layer_2, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1))
    layer_4         = DropoutLayer(layer_3, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_5         = batch_norm(Conv3DDNNLayer(incoming=layer_4, num_filters=32, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_6         = batch_norm(Conv3DDNNLayer(incoming=layer_5, num_filters=32, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_7         = MaxPool3DDNNLayer(layer_6, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1))
    layer_8         = DropoutLayer(layer_7, p=0.25)
    
    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_5         = batch_norm(Conv3DDNNLayer(incoming=layer_8, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_6         = batch_norm(Conv3DDNNLayer(incoming=layer_5, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_7         = batch_norm(Conv3DDNNLayer(incoming=layer_6, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_8         = MaxPool3DDNNLayer(layer_7, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1))
    layer_9         = DropoutLayer(layer_8, p=0.25)

    layer_flatten = FlattenLayer(layer_9)

    # Output Layer
    layer_hidden         = DenseLayer(layer_flatten, 500, nonlinearity=linear)
    layer_prediction     = DenseLayer(layer_hidden, 2, nonlinearity=linear)

    # Loss
    prediction           = get_output(layer_prediction) #/ multiply_var
    loss                 = squared_error(prediction, target_var)
    loss                 = loss.mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params               = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction      = get_output(layer_prediction, deterministic=True) # / multiply_var
    test_loss            = squared_error(test_prediction, target_var)
    test_loss            = test_loss.mean()

    # crps estimate
    crps                 = T.abs_(test_prediction - target_var).mean()/600

    return test_prediction, crps, loss, params
예제 #4
0
    def build_instrument_model(self, n_vars, **kwargs):

        targets = TT.vector()
        instrument_vars = TT.matrix()

        instruments = layers.InputLayer((None, n_vars), instrument_vars)
        instruments = layers.DropoutLayer(instruments, p=0.2)

        dense_layer = layers.DenseLayer(instruments, kwargs['dense_size'], nonlinearity=nonlinearities.tanh)
        dense_layer = layers.DropoutLayer(dense_layer, p=0.2)

        for _ in xrange(kwargs['n_dense_layers'] - 1):
            dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.tanh)
            dense_layer = layers.DropoutLayer(dense_layer, p=0.5)

        self.instrument_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear)
        init_params = layers.get_all_param_values(self.instrument_output)
        prediction = layers.get_output(self.instrument_output, deterministic=False)
        test_prediction = layers.get_output(self.instrument_output, deterministic=True)

        # flexible here, endog variable can be categorical, continuous, etc.
        l2_cost = regularization.regularize_network_params(self.instrument_output, regularization.l2)
        loss = objectives.squared_error(prediction.flatten(), targets.flatten()).mean() + 1e-4 * l2_cost
        loss_total = objectives.squared_error(prediction.flatten(), targets.flatten()).mean()

        params = layers.get_all_params(self.instrument_output, trainable=True)
        param_updates = updates.adadelta(loss, params)

        self._instrument_train_fn = theano.function(
            [
                targets,
                instrument_vars,
            ],
            loss,
            updates=param_updates
        )

        self._instrument_loss_fn = theano.function(
            [
                targets,
                instrument_vars,
            ],
            loss_total
        )

        self._instrument_output_fn = theano.function([instrument_vars], test_prediction)

        return init_params
def prepare():

    X = T.fmatrix('X')
    y = T.ivector('y')

    assert not ("regression" in args and "logistic" in args)

    if "regression" in args:
        output_layer = squared_error_net_adaptive()
    else:
        output_layer = logistic()

    all_params = lasagne.layers.get_all_params(output_layer)

    if "regression" in args:
        prob_vector = lasagne.layers.get_output(output_layer, X)
        loss = squared_error(prob_vector, y).mean()
        pred = T.maximum(0, T.minimum( T.round(prob_vector), args["num_classes"]-1 ) )
        accuracy = T.mean( T.eq( pred, y ) )
    else:
        a = args["a"]
        b = args["b"]
        loss_fn = get_hybrid_loss(a,b)
        prob_vector = lasagne.layers.get_output(output_layer, X)
        loss = loss_fn(prob_vector, y).mean()
        pred = T.argmax( prob_vector, axis=1 )
        accuracy = T.mean( T.eq(pred,y) )

    return Container(
        { "X": X, "y": y, "output_layer": output_layer, "all_params": all_params,
        "loss": loss, "pred": pred, "accuracy": accuracy,
        "prob_vector": prob_vector
        }
    )
예제 #6
0
    def build_validate_fn(self):

        prediction = get_output(self.network, deterministic=True)
        loss = squared_error(prediction, self.target_var)
        loss = loss.mean()

        self.val_fn = theano.function([self.input_var, self.target_var], loss)
예제 #7
0
    def _create_nnet(input_dims, output_dims, learning_rate, num_hidden_units=15, batch_size=32, max_train_epochs=1,
                     hidden_nonlinearity=nonlinearities.rectify, output_nonlinearity=None, update_method=updates.sgd):
        """
        A subclass may override this if a different sort
        of network is desired.
        """
        nnlayers = []
        nnlayers.append(layers.InputLayer(shape=(None, input_dims)))
        nnlayers.append(layers.DenseLayer(nnlayers[-1], num_hidden_units, nonlinearity=hidden_nonlinearity))
        nnlayers.append(layers.DenseLayer(nnlayers[-1], output_dims, nonlinearity=output_nonlinearity))

        prediction = layers.get_output(nnlayers[-1])

        input_var = nnlayers[0].input_var
        target = T.matrix(name="target", dtype=floatX)

        loss = objectives.squared_error(prediction, target).mean()

        params = layers.get_all_params(nnlayers[-1], trainable=True)

        updates = update_method(loss, params, learning_rate)

        fit = theano.function([input_var, target], loss, updates=updates)

        predict = theano.function([input_var], prediction)

        nnet = Mock(
            fit=fit,
            predict=predict,
        )
        return nnet
예제 #8
0
def test_squared_error(colvect):
    # symbolic version
    from lasagne.objectives import squared_error
    if not colvect:
        a, b = theano.tensor.matrices('ab')
        c = squared_error(a, b)
    else:
        a, b = theano.tensor.vectors('ab')
        c = squared_error(a.dimshuffle(0, 'x'), b)[:, 0]
    # numeric version
    floatX = theano.config.floatX
    shape = (10, 20) if not colvect else (10,)
    x = np.random.rand(*shape).astype(floatX)
    y = np.random.rand(*shape).astype(floatX)
    z = (x - y)**2
    # compare
    assert np.allclose(z, c.eval({a: x, b: y}))
예제 #9
0
def create_network(available_actions_num):
    # Creates the input variables
    s1 = tensor.tensor4("States")
    a = tensor.vector("Actions", dtype="int32")
    q2 = tensor.vector("Next State best Q-Value")
    r = tensor.vector("Rewards")
    nonterminal = tensor.vector("Nonterminal", dtype="int8")

    # Creates the input layer of the network.
    dqn = InputLayer(shape=[None, 1, downsampled_y, downsampled_x], input_var=s1)

    # Adds 3 convolutional layers, each followed by a max pooling layer.
    dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8],
                      nonlinearity=rectify, W=GlorotUniform("relu"),
                      b=Constant(.1))
    dqn = MaxPool2DLayer(dqn, pool_size=[2, 2])
    dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4],
                      nonlinearity=rectify, W=GlorotUniform("relu"),
                      b=Constant(.1))

    dqn = MaxPool2DLayer(dqn, pool_size=[2, 2])
    dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[3, 3],
                      nonlinearity=rectify, W=GlorotUniform("relu"),
                      b=Constant(.1))
    dqn = MaxPool2DLayer(dqn, pool_size=[2, 2])
    # Adds a single fully connected layer.
    dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=GlorotUniform("relu"),
                     b=Constant(.1))

    # Adds a single fully connected layer which is the output layer.
    # (no nonlinearity as it is for approximating an arbitrary real function)
    dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None)

    # Theano stuff
    q = get_output(dqn)
    # Only q for the chosen actions is updated more or less according to following formula:
    # target Q(s,a,t) = r + gamma * max Q(s2,_,t+1)
    target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * nonterminal * q2)
    loss = squared_error(q, target_q).mean()

    # Updates the parameters according to the computed gradient using rmsprop.
    params = get_all_params(dqn, trainable=True)
    updates = rmsprop(loss, params, learning_rate)

    # Compiles theano functions
    print "Compiling the network ..."
    function_learn = theano.function([s1, q2, a, r, nonterminal], loss, updates=updates, name="learn_fn")
    function_get_q_values = theano.function([s1], q, name="eval_fn")
    function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn")
    print "Network compiled."

    # Returns Theano objects for the net and functions.
    # We wouldn't need the net anymore but it is nice to save your model.
    return dqn, function_learn, function_get_q_values, function_get_best_action
예제 #10
0
    def build_train_fn(self):

        prediction = get_output(self.network, deterministic=False)

        loss = squared_error(prediction, self.target_var)
        loss = loss.mean()

        params = get_all_params(self.network, trainable=True)

        updates = nesterov_momentum(loss, params, learning_rate=self.learning_rate, momentum=self.momentum)

        self.train_fn = theano.function([self.input_var, self.target_var], loss, updates=updates)
예제 #11
0
def test_squared_error():
    # symbolic version
    from lasagne.objectives import squared_error
    a, b = theano.tensor.matrices('ab')
    c = squared_error(a, b)
    # numeric version
    floatX = theano.config.floatX
    x = np.random.randn(10, 20).astype(floatX)
    y = np.random.randn(10, 20).astype(floatX)
    z = (x - y)**2
    # compare
    assert np.allclose(z, c.eval({a: x, b: y}))
def create_network(available_actions_count):
    # Create the input variables
    s1 = tensor.tensor4("States")
    a = tensor.vector("Actions", dtype="int32")
    q2 = tensor.vector("Next State's best Q-Value")
    r = tensor.vector("Rewards")
    isterminal = tensor.vector("IsTerminal", dtype="int8")

    # Create the input layer of the network.
    dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1)

    # Add 2 convolutional layers with ReLu activation
    dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[6, 6],
                      nonlinearity=rectify, W=HeUniform("relu"),
                      b=Constant(.1), stride=3)
    dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[3, 3],
                      nonlinearity=rectify, W=HeUniform("relu"),
                      b=Constant(.1), stride=2)

    # Add a single fully-connected layer.
    dqn = DenseLayer(dqn, num_units=128, nonlinearity=rectify, W=HeUniform("relu"),
                     b=Constant(.1))

    # Add the output layer (also fully-connected).
    # (no nonlinearity as it is for approximating an arbitrary real function)
    dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None)

    # Define the loss function
    q = get_output(dqn)
    # target differs from q only for the selected action. The following means:
    # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r
    target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2)
    loss = squared_error(q, target_q).mean()

    # Update the parameters according to the computed gradient using RMSProp.
    params = get_all_params(dqn, trainable=True)
    updates = rmsprop(loss, params, learning_rate)

    # Compile the theano functions
    print "Compiling the network ..."
    function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn")
    function_get_q_values = theano.function([s1], q, name="eval_fn")
    function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn")
    print "Network compiled."

    def simple_get_best_action(state):
        return function_get_best_action(state.reshape([1, 1, resolution[0], resolution[1]]))

    # Returns Theano objects for the net and functions.
    return dqn, function_learn, function_get_q_values, simple_get_best_action
예제 #13
0
def run(get_model, model_name):
	train_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10)
	valid_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10, port=5558)

	ftensor5 = tensor.TensorType('float32', (False,)*5)

	input_var  = ftensor5('sax_features')
	target_var = tensor.matrix('targets')
	multiply_var = tensor.matrix('multiplier')
	multiply_var = T.addbroadcast(multiply_var, 1)

	prediction, test_prediction, test_pred_mid, params_bottom, params_top = get_model(input_var, multiply_var)

	# load parameters
	cg = ComputationGraph(test_pred_mid)
	params_val = numpy.load('sunnybrook/best_weights.npz')
	
	for p, value in zip(cg.shared_variables, params_val['arr_0']):
		p.set_value(value)

	crps = tensor.abs_(test_prediction - target_var).mean()

	loss = squared_error(prediction, target_var).mean()

	loss.name = 'loss'
	crps.name = 'crps'

	algorithm = GradientDescent(
		cost=loss,
		parameters=params_top,
		step_rule=Adam(),
		on_unused_sources='ignore'
	)

	host_plot = 'http://localhost:5006'

	extensions = [
		Timing(),
		TrainingDataMonitoring([loss], after_epoch=True),
		DataStreamMonitoring(variables=[crps, loss], data_stream=valid_stream, prefix="valid"),
		Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'], ['valid_crps']], after_epoch=True, server_url=host_plot),
		Printing(),
		Checkpoint('train'),
		FinishAfter(after_n_epochs=20)
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions)
	main_loop.run()
예제 #14
0
파일: bidnn.py 프로젝트: v-v/BiDNN
    def __init__(self, conf):
        self.conf = conf

        if self.conf.act == "linear":
            self.conf.act = linear
        elif self.conf.act == "sigmoid":
            self.conf.act = sigmoid
        elif self.conf.act == "relu":
            self.conf.act = rectify
        elif self.conf.act == "tanh":
            self.conf.act = tanh
        else:
            raise ValueError("Unknown activation function", self.conf.act)

        input_var_first   = T.matrix('inputs1')
        input_var_second  = T.matrix('inputs2')
        target_var        = T.matrix('targets')

        # create network        
        self.autoencoder, encoder_first, encoder_second = self.__create_toplogy__(input_var_first, input_var_second)
        
        self.out = get_output(self.autoencoder)
        
        loss = squared_error(self.out, target_var)
        loss = loss.mean()
        
        params = get_all_params(self.autoencoder, trainable=True)
        updates = nesterov_momentum(loss, params, learning_rate=self.conf.lr, momentum=self.conf.momentum)
        
        # training function
        self.train_fn = theano.function([input_var_first, input_var_second, target_var], loss, updates=updates)
        
        # fuction to reconstruct
        test_reconstruction = get_output(self.autoencoder, deterministic=True)
        self.reconstruction_fn = theano.function([input_var_first, input_var_second], test_reconstruction)
        
        # encoding function
        test_encode = get_output([encoder_first, encoder_second], deterministic=True)
        self.encoding_fn = theano.function([input_var_first, input_var_second], test_encode)

        # utils
        blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0]
        self.blas_nrm2 = blas('nrm2', np.array([], dtype=float))
        self.blas_scal = blas('scal', np.array([], dtype=float))

        # load weights if necessary
        if self.conf.load_model is not None:
            self.load_model()
예제 #15
0
def build_loss(targets, prediction, optimization):
	""" setup loss function with weight decay regularization """

	if optimization["objective"] == 'categorical':
		loss = objectives.categorical_crossentropy(prediction, targets)

	elif optimization["objective"] == 'binary':
		prediction = T.clip(prediction, 1e-7, 1-1e-7)
		loss = -(targets*T.log(prediction) + (1.0-targets)*T.log(1.0-prediction))
		# loss = objectives.binary_crossentropy(prediction[:,loss_index], targets[:,loss_index])

	elif (optimization["objective"] == 'squared_error'):
		loss = objectives.squared_error(prediction, targets)

	loss = objectives.aggregate(loss, mode='mean')

	return loss
예제 #16
0
def compute_cost_fake(df):
	"""
	Pass a pandas dataframe df and then generate signal.
	Call the entropy error
	and return the averaged cost
	"""
	import numpy as np
	import cPickle
	from nn import nn_param
	from matplotlib import pyplot as plt
	from theano import tensor as T
	f=file("nnparams.sav")
	update=cPickle.load(f)
	sig=np.asarray(df.l0)
	sig_noise=np.asarray(df.l0+df.noise)
	sig/=np.max(sig)
	sig_noise/=np.max(sig_noise)
	yval=nn_param(update,sig_noise)
	return T.mean(squared_error(yval,sig))
예제 #17
0
def _get_objective(policy,state_values,actions,reference_state_values,
                         is_alive = "always",min_log_proba = -1e6):
    """returns a2v loss sum"""
    if is_alive == "always":
        is_alive = T.ones_like(actions,dtype=theano.config.floatX)
        
    
    action_probas =  get_action_Qvalues(policy,actions)
    
    reference_state_values = consider_constant(reference_state_values)
    
    log_probas = T.maximum(T.log(action_probas),min_log_proba)
    
    policy_loss_elwise = - log_probas * (reference_state_values - consider_constant(state_values))
    
    
    V_err_elwise = squared_error(reference_state_values,state_values)
    
    return (policy_loss_elwise + V_err_elwise)*is_alive
예제 #18
0
    def __init__(self,  nnet_x_to_z, nnet_z_to_x,
                        batch_optimizer=None, rng=None,
                        noise_function=None,
                        loss_function=None,
                        loss_function_y=None,
                        loss_function_z=None,
                        nnet_x_to_y=None,
                        X_type=None,
                        walkback=1):
        self.nnet_x_to_z = nnet_x_to_z
        self.nnet_z_to_x = nnet_z_to_x
        self.nnet_x_to_y = nnet_x_to_y

        if batch_optimizer is None:
            batch_optimizer = easy.BatchOptimizer()
        self.batch_optimizer = batch_optimizer
        self.batch_optimizer.model = self
        if rng is None:
            rng = RandomStreams(seed=10001)
        self.rng = rng

        self.encode_function = None  # only available after fit
        self.decode_function = None  # only available after fit
        self.predict_function = None # only available after fit
        self.iter_update_batch = None
        self.iter_update = None

        self.get_loss = None

        if loss_function is None:
            loss_function = lambda x, x_hat : objectives.squared_error(x, x_hat).sum(axis=1)
        self.loss_function = loss_function
        self.loss_function_y = loss_function_y
        self.loss_function_z = loss_function_z
        self.noise_function = noise_function
        self.walkback = walkback

        if X_type is None:
            X_type = T.matrix
        self.X_type = X_type
예제 #19
0
def main():

    s1 = tensor.tensor4("States")
    a = tensor.vector("Actions", dtype="int32")
    q2 = tensor.vector("Next State best Q-Value")
    r = tensor.vector("Rewards")
    nonterminal = tensor.vector("Nonterminal", dtype="int8")
    
    
    dqn = InputLayer(shape=[None, 1, 2000], input_var=s1)#zredukowalem 2 wymiary do jednego - czy dobrze?
    dqn = DenseLayer(dqn, num_units=2000, nonlinearity=rectify, W=GlorotUniform("relu"),b=Constant(.1))
    dqn = DenseLayer(dqn, num_units=2000, nonlinearity=rectify, W=GlorotUniform("relu"),b=Constant(.1))
    dqn = DenseLayer(dqn, num_units=2000, nonlinearity=rectify, W=GlorotUniform("relu"),b=Constant(.1))
    
    dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None)
    
    q = get_output(dqn)
    target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * nonterminal * q2)
    loss = squared_error(q, target_q).mean()
    
    params = get_all_params(dqn, trainable=True)
    updates = rmsprop(loss, params, learning_rate)
    
    function_learn = theano.function([s1, q2, a, r, nonterminal], loss, updates=updates, name="learn_fn")
    function_get_q_values = theano.function([s1], q, name="eval_fn")
    function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn")
    
    # Creates and initializes the environment.
    print "Initializing doom..."
    game = DoomGame()
    game.load_config("../../examples/config/learning.cfg")
    game.init()
    print "Doom initialized."
    
    # Creates all possible actions.
    n = game.get_available_buttons_size()
    actions = []
    for perm in it.product([0, 1], repeat=n):
        actions.append(list(perm))
예제 #20
0
    def _create_network(self):
        logger.info("Building network ...")
        net, input_var = self._build_network()
        target_values = T.matrix('target_output')
        actions = T.icol('actions')

        # Create masks
        # mask = theano.shared(np.zeros((self.batch_size, self.num_actions)).astype(np.int32))
        mask = T.zeros_like(target_values)
        mask = T.set_subtensor(mask[T.arange(self.batch_size), actions.reshape((-1,))], 1)

        # feed-forward path
        network_output = lasagne.layers.get_output(net, input_var / 255.0)

        # Add regularization penalty
        loss = squared_error(network_output * mask, target_values).mean()
        if self.weight_decay > 0.0:
            loss += regularize_network_params(net, l2) * self.weight_decay

        # Retrieve all parameters from the network
        all_params = lasagne.layers.get_all_params(net, trainable=True)

        # Compute updates for training
        if self.clip_error:
            grads = theano.gradient.grad(loss, all_params)
            grads = [lasagne.updates.norm_constraint(grad, self.clip_error, range(grad.ndim)) for grad in grads]
            updates = self.optimizer(grads, all_params, learning_rate=self.learning_rate, rho=self.decay_rate)
        else:
            updates = self.optimizer(loss, all_params, learning_rate=self.learning_rate, rho=self.decay_rate)

        # Theano functions for training and computing cost
        logger.info("Compiling functions ...")
        train = theano.function([input_var, target_values, actions], [loss, network_output, target_values, mask], updates=updates)
        predict = theano.function([input_var], network_output)

        return net, train, predict
예제 #21
0
파일: fwrf.py 프로젝트: styvesg/fwrf
def learn_params(
        mst_data, voxels, w_params, \
        batches=(1,1,1), holdout_size=100, lr=1e-4, l2=0.0, num_epochs=1, output_val_scores=-1, output_val_every=1, verbose=False, dry_run=False):
    ''' 
        batches dims are (samples, voxels, candidates)
    '''
    assert len(mst_data) == len(voxels), "data/target length mismatch"
    n, nf, _, nt = mst_data.shape
    _, nv = voxels.shape
    bn, bv, bt = batches
    nbv, nbt = nv // bv, nt // bt
    rbv, rbt = nv - nbv * bv, nt - nbt * bt
    assert rbt == 0, "the model batch size must be an divisor of the total number of models"
    if verbose:
        print "Grad. Desc. planned in %d batch with batch size %d and residual %d" % \
            (int(np.ceil(float(n-holdout_size) / bn)), bn, (n-holdout_size)%bn)
        print "%d voxel batches of size %d with residual %d" % (nbv, bv, rbv)
        print "%d candidate batches of size %d with residual %d" % (nbt, bt,
                                                                    rbt)
        print "for %d voxelmodel fits." % (nv * nt)
        sys.stdout.flush()

    print 'CREATING SYMBOLS\n'
    _V = T.matrix()
    __V = _V.dimshuffle((0, 1, 'x'))
    __lr = theano.shared(fpX(lr))
    __l2 = theano.shared(fpX(l2))
    ### request shared memory
    __mst_sdata = theano.shared(np.zeros(shape=(n, nf, 1, bt), dtype=fpX))
    __vox_sdata = theano.shared(np.zeros(shape=(n, bv), dtype=fpX))
    __range = T.ivector()
    _smst_batch = __mst_sdata[__range[0]:__range[1]]
    _fwrf_o = svFWRF(_smst_batch, nf, bv, bt, add_bias=len(w_params) == 2)
    if verbose:
        plu.print_lasagne_network(_fwrf_o, skipnoparam=False)
    ### define and compile the training expressions.
    _fwrf_o_reg = __l2 * R.regularize_layer_params(_fwrf_o, R.l2)
    fwrf_o_params = L.get_all_params(_fwrf_o, trainable=True)

    _sV = __vox_sdata[__range[0]:__range[1]].dimshuffle((0, 1, 'x'))
    _fwrf_o_trn_pred = L.get_output(_fwrf_o, deterministic=False)
    _fwrf_o_trn_preloss = O.squared_error(_fwrf_o_trn_pred, _sV).mean(axis=0)
    _fwrf_o_trn_loss = _fwrf_o_trn_preloss.sum() + _fwrf_o_reg

    _fwrf_o_val_pred = L.get_output(_fwrf_o, deterministic=True)
    _fwrf_o_val_preloss = O.squared_error(_fwrf_o_val_pred, _sV).mean(
        axis=0)  #average across the batch elements
    ###
    __fwrf_o_updates = lasagne.updates.sgd(_fwrf_o_trn_loss,
                                           fwrf_o_params,
                                           learning_rate=__lr)
    #__fwrf_o_updates = lasagne.updates.adam(_fwrf_o_trn_loss, fwrf_o_params, learning_rate=self.__lr, beta1=0.5, epsilon=1e-12)
    print 'COMPILING...'
    sys.stdout.flush()
    comp_t = time.time()
    fwrf_o_trn_fn = theano.function([__range], updates=__fwrf_o_updates)
    fwrf_o_val_fn = theano.function([__range], _fwrf_o_val_preloss)
    print '%.2f seconds to compile theano functions' % (time.time() - comp_t)

    ### shuffle the time series of voxels and mst_data
    order = np.arange(n, dtype=int)
    np.random.shuffle(order)
    mst_data = mst_data[order]
    voxels = voxels[order]

    ### THIS IS WHERE THE MODEL OPTIMIZATION IS PERFORMED ###
    print "\nVoxel-Candidates model optimization..."
    start_time = time.time()
    val_batch_scores = np.zeros((bv, bt), dtype=fpX)
    best_epochs = np.zeros(shape=(nv), dtype=int)
    best_scores = np.full(shape=(nv), fill_value=np.inf, dtype=fpX)
    best_models = np.zeros(shape=(nv), dtype=int)

    #    W, b = fwrf_o_params #!!!!!
    best_w_params = [np.zeros(p.shape, dtype=fpX) for p in w_params]

    ### save score history
    num_outputs = int(
        num_epochs / output_val_every) + int(num_epochs % output_val_every > 0)
    val_scores = []
    if output_val_scores == -1:
        val_scores = np.zeros(shape=(num_outputs, nv, nt), dtype=fpX)
    elif output_val_scores > 0:
        outv = output_val_scores
        val_scores = np.zeros(shape=(num_outputs, bv * outv, nt), dtype=fpX)
    ###
    if dry_run:
        # free vram
        set_shared_parameters([
            __mst_sdata,
            __vox_sdata,
        ] + fwrf_o_params)
        return val_scores, best_scores, best_epochs, best_models, best_w_params
    ### VOXEL LOOP
    for v, (rv, lv) in tqdm(enumerate(iterate_range(0, nv, bv))):
        voxelSlice = voxels[:, rv]
        best_epochs_slice = best_epochs[rv]
        best_scores_slice = best_scores[rv]
        best_models_slice = best_models[rv]
        params_init = [p[rv] for p in w_params]
        #        rW, rb = w_params[0][rv,:], w_params[1][rv]
        if lv < bv:  #PATCH UP MISSING DATA FOR THE FIXED VOXEL BATCH SIZE
            voxelSlice = np.concatenate(
                (voxelSlice, np.zeros(shape=(n, bv - lv), dtype=fpX)), axis=1)
            for i, p in enumerate(params_init):
                params_init[i] = np.concatenate(
                    (p, np.zeros(shape=(bv - lv, ) + p.shape[1:], dtype=fpX)),
                    axis=0)
#            rW = np.concatenate((rW, np.zeros(shape=(bv-lv, nf), dtype=fpX)), axis=0)
#            rb = np.concatenate((rb, np.zeros(shape=(bv-lv), dtype=fpX)), axis=0)
        for i, p in enumerate(params_init):
            if len(p.shape) == 2:
                params_init[i] = np.repeat(p.T, repeats=bt)
            else:
                params_init[i] = np.repeat(p, repeats=bt)


#        pW = np.repeat(rW.T, repeats=bt).reshape((nf,bv,bt)) # ALL CANDIDATE MODELS GET THE SAME INITIAL PARAMETER VALUES
#        pb = np.repeat(rb, repeats=bt).reshape((1, bv,bt))

        set_shared_parameters([__vox_sdata], [voxelSlice])
        ### CANDIDATE LOOP
        for t in range(nbt):  ## CANDIDATE BATCH LOOP
            # need to recompile to reset the solver!!! (depending on the solver used)
            fwrf_o_trn_fn = theano.function([__range],
                                            updates=__fwrf_o_updates)
            # set the shared parameter values for this candidates. Every candidate restart at the same point.
            set_shared_parameters([
                __mst_sdata,
            ] + fwrf_o_params, [
                mst_data[:, :, :, t * bt:(t + 1) * bt],
            ] + params_init)
            print "\n  Voxel %d:%d of %d, Candidate %d:%d of %d" % (
                rv[0], rv[-1] + 1, nv, t * bt, (t + 1) * bt, nt)
            ### EPOCH LOOP
            epoch_start = time.time()
            for epoch in range(num_epochs):
                ######## ONE EPOCH OF TRAINING ###########
                val_batch_scores.fill(0)
                # In each epoch, we do a full pass over the training data:
                for rb, lb in iterate_bounds(0, n - holdout_size, bn):
                    fwrf_o_trn_fn(rb)
                # and one pass over the validation set.
                val_batches = 0
                for rb, lb in iterate_bounds(n - holdout_size, holdout_size,
                                             bn):
                    loss = fwrf_o_val_fn(rb)
                    val_batch_scores += loss
                    val_batches += lb
                val_batch_scores /= val_batches
                if verbose:
                    print "    validation <loss>: %.6f" % (
                        val_batch_scores.mean())
                ### RECORD TIME SERIES ###
                if epoch % output_val_every == 0:
                    if output_val_scores == -1:
                        val_scores[int(epoch / output_val_every), rv, t *
                                   bt:(t + 1) * bt] = val_batch_scores[:lv, :]
                    elif output_val_scores > 0:
                        val_scores[int(epoch / output_val_every),
                                   v * outv:(v + 1) * outv, t * bt:(t + 1) *
                                   bt] = val_batch_scores[:min(outv, lv), :]
                ##### RECORD MINIMUM SCORE AND MODELS #####
                best_models_for_this_epoch = np.argmin(
                    val_batch_scores[:lv, :], axis=1)
                best_scores_for_this_epoch = np.amin(val_batch_scores[:lv, :],
                                                     axis=1)
                # This updates the BEST RELATIVE MODELS, along with their associated scores
                best_scores_mask = (
                    best_scores_for_this_epoch < best_scores_slice
                )  #all the voxels that show an improvement
                best_epochs_slice[best_scores_mask] = epoch
                np.copyto(best_scores_slice,
                          best_scores_for_this_epoch,
                          casting='same_kind',
                          where=best_scores_mask)
                np.copyto(
                    best_models_slice,
                    best_models_for_this_epoch + t * bt,
                    casting='same_kind',
                    where=best_scores_mask
                )  #notice the +t*bt to return the best model across all models, not just the batch's
                #to select the weight slices we need, we need to specify the voxels that showed improvement AND the models that correspond to these improvements.
                update_vm_pos = np.zeros((bv, bt), dtype=bool)
                update_vm_pos[
                    np.arange(lv)[best_scores_mask],
                    best_models_for_this_epoch[best_scores_mask]] = True
                update_vm_idx = np.arange(bv * bt)[update_vm_pos.flatten()]
                # update the best parameter values based on the voxelmodel validation scores.

                for bwp, p in zip(best_w_params, fwrf_o_params):
                    pv = p.get_value()
                    if len(bwp.shape) == 2:
                        bwp[np.asarray(rv)[best_scores_mask]] = (pv.reshape(
                            (pv.shape[0], -1)).T)[update_vm_idx]
                    else:
                        bwp[np.asarray(rv)[best_scores_mask]] = (pv.reshape(
                            (-1)))[update_vm_idx]
                #best_w_params[0][np.asarray(rv)[best_scores_mask], :] = (W.get_value().reshape((nf,-1))[:,update_vm_idx]).T
                #best_w_params[1][np.asarray(rv)[best_scores_mask]]    = b.get_value().reshape((-1))[update_vm_idx]

            batch_time = time.time() - epoch_start
            print "    %d Epoch for %d voxelmodels took %.3fs @ %.3f voxelmodels/s" % (
                num_epochs, lv * bt, batch_time, fpX(lv * bt) / batch_time)
            sys.stdout.flush()
        #end candidate loop
        best_epochs[rv] = np.copy(best_epochs_slice)
        best_scores[rv] = np.copy(best_scores_slice)  ##NECESSARY TO COPY BACK
        best_models[rv] = np.copy(best_models_slice)
    # end voxel loop
    # free shared vram
    set_shared_parameters([
        __mst_sdata,
        __vox_sdata,
    ] + fwrf_o_params)
    full_time = time.time() - start_time
    print "\n---------------------------------------------------------------------"
    print "%d Epoch for %d voxelmodels took %.3fs @ %.3f voxelmodels/s" % (
        num_epochs, nv * nt, full_time, fpX(nv * nt) / full_time)
    return val_scores, best_scores, best_epochs, best_models, best_w_params
예제 #22
0
def get_model():

    dtensor4 = T.TensorType("float32", (False,) * 4)
    input_var = dtensor4("inputs")
    dtensor2 = T.TensorType("float32", (False,) * 2)
    target_var = dtensor2("targets")

    # input layer with unspecified batch size
    layer_input = InputLayer(
        shape=(None, 30, 64, 64), input_var=input_var
    )  # InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer_0 = DimshuffleLayer(layer_input, (0, "x", 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_1 = batch_norm(
        Conv3DDNNLayer(
            incoming=layer_0,
            num_filters=64,
            filter_size=(3, 3, 3),
            stride=(1, 3, 3),
            pad="same",
            nonlinearity=leaky_rectify,
            W=Orthogonal(),
        )
    )
    layer_2 = MaxPool3DDNNLayer(layer_1, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_3 = DropoutLayer(layer_2, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_4 = batch_norm(
        Conv3DDNNLayer(
            incoming=layer_3,
            num_filters=128,
            filter_size=(3, 3, 3),
            stride=(1, 3, 3),
            pad="same",
            nonlinearity=leaky_rectify,
            W=Orthogonal(),
        )
    )
    layer_5 = MaxPool3DDNNLayer(layer_4, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_6 = DropoutLayer(layer_5, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_7 = batch_norm(
        Conv3DDNNLayer(
            incoming=layer_6,
            num_filters=256,
            filter_size=(3, 3, 3),
            stride=(1, 3, 3),
            pad="same",
            nonlinearity=leaky_rectify,
            W=Orthogonal(),
        )
    )
    layer_8 = MaxPool3DDNNLayer(layer_7, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_9 = DropoutLayer(layer_8, p=0.25)

    # Recurrent layer
    layer_10 = DimshuffleLayer(layer_9, (0, 2, 1, 3, 4))
    layer_11 = LSTMLayer(layer_10, num_units=612, hid_init=Orthogonal(), only_return_final=False)

    # Output Layer
    layer_systole = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal())
    layer_diastole = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal())
    layer_systole_1 = DropoutLayer(layer_systole, p=0.3)
    layer_diastole_1 = DropoutLayer(layer_diastole, p=0.3)

    layer_systole_2 = DenseLayer(layer_systole_1, 1, nonlinearity=None, W=Orthogonal())
    layer_diastole_2 = DenseLayer(layer_diastole_1, 1, nonlinearity=None, W=Orthogonal())
    layer_output = ConcatLayer([layer_systole_2, layer_diastole_2])

    # Loss
    prediction = get_output(layer_output)
    loss = squared_error(prediction, target_var)
    loss = loss.mean()

    # Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum Or Adam
    params = get_all_params(layer_output, trainable=True)
    updates = adam(loss, params)
    # updates_0            = rmsprop(loss, params)
    # updates              = apply_nesterov_momentum(updates_0, params)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction = get_output(layer_output, deterministic=True)
    test_loss = squared_error(test_prediction, target_var)
    test_loss = test_loss.mean()

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)

    # Compile a second function computing the validation loss and accuracy
    val_fn = theano.function([input_var, target_var], test_loss, allow_input_downcast=True)

    # Compule a third function computing the prediction
    predict_fn = theano.function([input_var], test_prediction, allow_input_downcast=True)

    return [layer_output, train_fn, val_fn, predict_fn]
예제 #23
0
파일: ldrhdr.py 프로젝트: soravux/jambokoko
    (Conv2DLayerFast, {'num_filters': 1, 'filter_size': filter_size, 'pad': pad_out}),
    (ReshapeLayer, {'shape': (([0], -1))}),
]


# In[ ]:

input_var = T.tensor4('inputs')
output_var = T.matrix('outputs')

network = layers[0][0](input_var=input_var, **layers[0][1])
for layer in layers[1:]:
    network = layer[0](network, **layer[1])

prediction = get_output(network)
loss = squared_error(prediction, output_var)
loss = loss.mean()

params = get_all_params(network, trainable=True)
#updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)
updates = sgd(loss, params, learning_rate=0.01)


test_prediction = get_output(network, deterministic=True)
test_loss = squared_error(test_prediction, output_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
#test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), output_var),
#                  dtype=theano.config.floatX)

train_fn = theano.function([input_var, output_var], loss, updates=updates)# , mode=theano.compile.MonitorMode(post_func=theano.compile.monitormode.detect_nan))
예제 #24
0
def test_squared_error_preserve_dtype():
    from lasagne.objectives import squared_error
    for dtype in 'float64', 'float32', 'float16':
        a = theano.tensor.matrix('a', dtype=dtype)
        b = theano.tensor.matrix('b', dtype=dtype)
        assert squared_error(a, b).dtype == dtype
예제 #25
0
def get_elementwise_objective(Qvalues,
                              actions,
                              rewards,
                              is_alive="always",
                              Qvalues_target=None,
                              gamma_or_gammas=0.95,
                              crop_last=True,
                              force_qvalues_after_end=True,
                              qvalues_after_end="zeros",
                              consider_reference_constant=True, ):
    """
    Returns squared error between predicted and reference Qvalues according to Q-learning algorithm

        Qreference(state,action) = reward(state,action) + gamma* Q(next_state,next_action)
        loss = mean over (Qvalues - Qreference)**2

    :param Qvalues: [batch,tick,action_id] - predicted qvalues
    :param actions: [batch,tick] - commited actions
    :param rewards: [batch,tick] - immediate rewards for taking actions at given time ticks
    :param is_alive: [batch,tick] - whether given session is still active at given tick. Defaults to always active.
                            Default value of is_alive implies a simplified computation algorithm for Qlearning loss
    :param Qvalues_target: Older snapshot Qvalues (e.g. from a target network). If None, uses current Qvalues
    :param gamma_or_gammas: a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts
    :param crop_last: if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end
    :param force_qvalues_after_end: if true, sets reference Qvalues at session end to rewards[end] + qvalues_after_end
    :param qvalues_after_end: [batch,1,n_actions] - symbolic expression for "next state q-values" for last tick used for reference only.
                            Defaults at  T.zeros_like(Qvalues[:,0,None,:])
                            If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] )
    :param consider_reference_constant: whether or not zero-out gradient flow through reference_Qvalues
            (True is highly recommended unless you know what you're doind)
    :return: tensor [batch, tick] of squared errors over Qvalues (using formula above for loss)
    """
    if Qvalues_target is None:
        Qvalues_target = Qvalues



    # get reference Qvalues via Q-learning algorithm
    reference_Qvalues = get_reference_Qvalues(Qvalues_target, actions, rewards,
                                              gamma_or_gammas=gamma_or_gammas,
                                              qvalues_after_end=qvalues_after_end,
                                              )

    if consider_reference_constant:
        # do not pass gradient through reference Q-values (since they DO depend on Q-values by default)
        reference_Qvalues = consider_constant(reference_Qvalues)

    # get predicted qvalues for committed actions (to compare with reference Q-values)
    action_Qvalues = get_action_Qvalues(Qvalues, actions)

    # if agent is always alive, return the simplified loss
    if is_alive == "always":

        # tensor of element-wise squared errors
        elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues)

    else:
        # we are given an is_alive matrix : uint8[batch,tick]

        # if asked to force reference_Q[end_tick+1,a] = 0, do it
        # note: if agent is always alive, this is meaningless

        if force_qvalues_after_end:
            # set future rewards at session end to rewards + qvalues_after_end
            end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero()

            if qvalues_after_end == "zeros":
                # "set reference Q-values at end action ids to just the immediate rewards"
                reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids], rewards[end_ids])
            else:
                last_optimal_rewards = T.zeros_like(rewards[:, 0])

                # "set reference Q-values at end action ids to the immediate rewards + qvalues after end"
                reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids],
                                                    rewards[end_ids] + gamma_or_gammas * last_optimal_rewards[
                                                        end_ids[0], 0]
                                                    )

        # tensor of element-wise squared errors
        elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues)

        # zero-out loss after session ended
        elwise_squared_error = elwise_squared_error * is_alive
     
    if crop_last:
        elwise_squared_error = T.set_subtensor(elwise_squared_error[:,-1],0)


    return elwise_squared_error
예제 #26
0
    def create_nnet(input_dims, action_dims, observation_dims, value_dims, learning_rate, grad_clip=None, l1_weight=None, l2_weight=None,
                    num_hidden_units=20, num_hidden_action_units=None, num_hidden_observ_units=None, num_hidden_value_units=None,
                    batch_size=32, max_train_epochs=1, hidden_nonlinearity=nonlinearities.rectify,
                    output_nonlinearity=None, update_method=updates.sgd):
        commonlayers = []
        commonlayers.append(layers.InputLayer(shape=(None, input_dims)))
        commonlayers.append(DenseLayer(commonlayers[-1], num_hidden_units,
                                               nonlinearity=hidden_nonlinearity))
        if num_hidden_action_units is None:
            actionlayers = [DenseLayer(commonlayers[-1], action_dims,
                                               nonlinearity=output_nonlinearity)]
        else:
            actionlayers = [DenseLayer(commonlayers[-1], num_hidden_action_units,
                                               nonlinearity=output_nonlinearity)]
            actionlayers.append(DenseLayer(actionlayers[-1], action_dims,
                                                   nonlinearity=output_nonlinearity))
        if num_hidden_observ_units is None:
            observlayers = [DenseLayer(commonlayers[-1], observation_dims,
                                               nonlinearity=output_nonlinearity)]
        else:
            observlayers = [DenseLayer(commonlayers[-1], num_hidden_observ_units,
                                               nonlinearity=output_nonlinearity)]
            observlayers.append(DenseLayer(observlayers[-1], observation_dims, nonlinearity=output_nonlinearity))
        if num_hidden_value_units is None:
            dvaluelayers = [DenseLayer(commonlayers[-1], value_dims,
                                               nonlinearity=output_nonlinearity)]
        else:
            dvaluelayers = [DenseLayer(commonlayers[-1], num_hidden_value_units,
                                               nonlinearity=output_nonlinearity)]
            dvaluelayers.append(DenseLayer(dvaluelayers[-1], value_dims,
                                                   nonlinearity=output_nonlinearity))
        actvallayers = [layers.ConcatLayer([actionlayers[-1], dvaluelayers[-1]])]
        obsvallayers = [layers.ConcatLayer([observlayers[-1], dvaluelayers[-1]])]
        concatlayers = [layers.ConcatLayer([actionlayers[-1], observlayers[-1], dvaluelayers[-1]])]
        action_prediction = layers.get_output(actionlayers[-1])
        dvalue_prediction = layers.get_output(dvaluelayers[-1])
        actval_prediction = layers.get_output(actvallayers[-1])
        obsval_prediction = layers.get_output(obsvallayers[-1])
        concat_prediction = layers.get_output(concatlayers[-1])
        input_var = commonlayers[0].input_var
        action_target = T.matrix(name="action_target", dtype=floatX)
        dvalue_target = T.matrix(name="value_target", dtype=floatX)
        actval_target = T.matrix(name="actval_target", dtype=floatX)
        obsval_target = T.matrix(name="obsval_target", dtype=floatX)
        concat_target = T.matrix(name="concat_target", dtype=floatX)
        action_loss = objectives.squared_error(action_prediction, action_target).mean()
        obsval_loss = objectives.squared_error(obsval_prediction, obsval_target).mean()
        dvalue_loss = objectives.squared_error(dvalue_prediction, dvalue_target).mean()
        actval_loss = objectives.squared_error(actval_prediction, actval_target).mean()
        concat_loss = objectives.squared_error(concat_prediction, concat_target).mean()
        if l1_weight is not None:
            action_l1penalty = regularize_layer_params(commonlayers + actionlayers, l1) * l1_weight
            obsval_l1penalty = regularize_layer_params(commonlayers + observlayers + dvaluelayers, l1) * l1_weight
            dvalue_l1penalty = regularize_layer_params(commonlayers + dvaluelayers, l1) * l1_weight
            actval_l1penalty = regularize_layer_params(commonlayers + actionlayers + dvaluelayers, l1) * l1_weight
            concat_l1penalty = regularize_layer_params(commonlayers + actionlayers + observlayers + dvaluelayers, l1) * l1_weight
            action_loss += action_l1penalty
            obsval_loss += obsval_l1penalty
            dvalue_loss += dvalue_l1penalty
            actval_loss += actval_l1penalty
            concat_loss += concat_l1penalty
        if l2_weight is not None:
            action_l2penalty = regularize_layer_params(commonlayers + actionlayers, l2) * l2_weight
            obsval_l2penalty = regularize_layer_params(commonlayers + observlayers + dvaluelayers, l2) * l2_weight
            dvalue_l2penalty = regularize_layer_params(commonlayers + dvaluelayers, l2) * l2_weight
            actval_l2penalty = regularize_layer_params(commonlayers + actionlayers + dvaluelayers, l2) * l2_weight
            concat_l2penalty = regularize_layer_params(commonlayers + actionlayers + observlayers + dvaluelayers, l2) * l2_weight
            action_loss += action_l2penalty
            obsval_loss += obsval_l2penalty
            dvalue_loss += dvalue_l2penalty
            actval_loss += actval_l2penalty
            concat_loss += concat_l2penalty
        action_params = layers.get_all_params(actionlayers[-1], trainable=True)
        obsval_params = layers.get_all_params(obsvallayers[-1], trainable=True)
        dvalue_params = layers.get_all_params(dvaluelayers[-1], trainable=True)
        actval_params = layers.get_all_params(actvallayers[-1], trainable=True)
        concat_params = layers.get_all_params(concatlayers[-1], trainable=True)
        if grad_clip is not None:
            action_grads = theano.grad(action_loss, action_params)
            obsval_grads = theano.grad(obsval_loss, obsval_params)
            dvalue_grads = theano.grad(dvalue_loss, dvalue_params)
            actval_grads = theano.grad(actval_loss, actval_params)
            concat_grads = theano.grad(concat_loss, concat_params)
            action_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in action_grads]
            obsval_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in obsval_grads]
            dvalue_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in dvalue_grads]
            actval_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in actval_grads]
            concat_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in concat_grads]
            action_updates = update_method(action_grads, action_params, learning_rate)
            obsval_updates = update_method(obsval_grads, obsval_params, learning_rate)
            dvalue_updates = update_method(dvalue_grads, dvalue_params, learning_rate)
            actval_updates = update_method(actval_grads, actval_params, learning_rate)
            concat_updates = update_method(concat_grads, concat_params, learning_rate)
        else:
            action_updates = update_method(action_loss, action_params, learning_rate)
            obsval_updates = update_method(obsval_loss, obsval_params, learning_rate)
            dvalue_updates = update_method(dvalue_loss, dvalue_params, learning_rate)
            actval_updates = update_method(actval_loss, actval_params, learning_rate)
            concat_updates = update_method(concat_loss, concat_params, learning_rate)

        fit_action = theano.function([input_var, action_target], action_loss, updates=action_updates)
        fit_obsval = theano.function([input_var, obsval_target], obsval_loss, updates=obsval_updates)
        fit_dvalue = theano.function([input_var, dvalue_target], dvalue_loss, updates=dvalue_updates)
        fit_actval = theano.function([input_var, actval_target], actval_loss, updates=actval_updates)
        fit_concat = theano.function([input_var, concat_target], concat_loss, updates=concat_updates)

        predict_action = theano.function([input_var], action_prediction)
        predict_obsval = theano.function([input_var], obsval_prediction)
        predict_dvalue = theano.function([input_var], dvalue_prediction)
        predict_actval = theano.function([input_var], actval_prediction)
        predict_concat = theano.function([input_var], concat_prediction)

        nnet = Mock(
            fit_action=fit_action,
            fit_obsval=fit_obsval,
            fit_value=fit_dvalue,
            fit_actval=fit_actval,
            fit_both=fit_concat,
            predict_action=predict_action,
            predict_obsval=predict_obsval,
            predict_value=predict_dvalue,
            predict_actval=predict_actval,
            predict_both=predict_concat,
        )
        return nnet
예제 #27
0
def get_model(input_var, target_var, multiply_var):

    # input layer with unspecified batch size
    layer_both_0 = InputLayer(shape=(None, 30, 64, 64), input_var=input_var)

    # Z-score?

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_both_1 = batch_norm(
        Conv2DLayer(layer_both_0,
                    64, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_both_2 = batch_norm(
        Conv2DLayer(layer_both_1,
                    64, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_both_3 = MaxPool2DLayer(layer_both_2,
                                  pool_size=(2, 2),
                                  stride=(2, 2),
                                  pad=(1, 1))
    layer_both_4 = DropoutLayer(layer_both_3, p=0.25)

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_both_5 = batch_norm(
        Conv2DLayer(layer_both_4,
                    128, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_both_6 = batch_norm(
        Conv2DLayer(layer_both_5,
                    128, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_both_7 = MaxPool2DLayer(layer_both_6,
                                  pool_size=(2, 2),
                                  stride=(2, 2),
                                  pad=(1, 1))
    layer_both_8 = DropoutLayer(layer_both_7, p=0.25)

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_both_9 = batch_norm(
        Conv2DLayer(layer_both_8,
                    256, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_both_10 = batch_norm(
        Conv2DLayer(layer_both_9,
                    256, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_both_11 = batch_norm(
        Conv2DLayer(layer_both_10,
                    256, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_both_12 = MaxPool2DLayer(layer_both_11,
                                   pool_size=(2, 2),
                                   stride=(2, 2),
                                   pad=(1, 1))
    layer_both_13 = DropoutLayer(layer_both_12, p=0.25)

    # Flatten
    layer_flatten = FlattenLayer(layer_both_13)

    # Prediction
    layer_hidden = DenseLayer(layer_flatten, 500, nonlinearity=sigmoid)
    layer_prediction = DenseLayer(layer_hidden, 2, nonlinearity=linear)

    # Loss
    prediction = get_output(layer_prediction) / multiply_var
    loss = squared_error(prediction, target_var)
    loss = loss.mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction = get_output(layer_prediction,
                                 deterministic=True) / multiply_var
    test_loss = squared_error(test_prediction, target_var)
    test_loss = test_loss.mean()

    # crps estimate
    crps = T.abs_(test_prediction - target_var).mean() / 600

    return test_prediction, crps, loss, params
    def build_model(self, train_set, test_set, validation_set=None):
        super(FCAE, self).build_model(train_set, test_set, validation_set)

        y_train = get_output(self.model, self.sym_x)
        loss = aggregate(squared_error(y_train, self.sym_x), mode='mean')
        # loss += + 1e-4 * lasagne.regularization.regularize_network_params(self.model, lasagne.regularization.l2)

        y_test = get_output(self.model, self.sym_x, deterministic=True)
        loss_test = aggregate(squared_error(y_test, self.sym_x), mode='mean')

        all_params = get_all_params(self.model, trainable=True)
        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        grads = T.grad(loss, all_params)
        ngrads = lasagne.updates.total_norm_constraint(grads, 5)
        cgrads = [T.clip(g, -5, 5) for g in ngrads]
        updates = rmsprop(cgrads, all_params, self.sym_lr, sym_beta1,
                          sym_beta2)

        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        f_train = theano.function(
            inputs,
            [loss],
            updates=updates,
            givens={
                self.sym_x: self.sh_train_x[self.batch_slice],
            },
        )

        f_test = theano.function(
            [self.sym_index, self.sym_batchsize],
            [loss_test],
            givens={
                self.sym_x: self.sh_test_x[self.batch_slice],
            },
            on_unused_input='ignore',
        )

        f_ae = None
        # f_ae = theano.function(
        #     [self.sym_batchsize], [y_test],
        #     givens={
        #         self.sym_x: self.sh_valid_x,
        #     },
        #     on_unused_input='ignore',
        # )

        self.train_args['inputs']['batchsize'] = 128
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 1e-6
        self.train_args['outputs']['loss'] = '%0.6f'

        self.test_args['inputs']['batchsize'] = 128
        self.test_args['outputs']['loss_test'] = '%0.6f'

        # self.validate_args['inputs']['batchsize'] = 128
        # self.validate_args['outputs']['loss_eval'] = '%0.6f'
        # self.validate_args['outputs']['loss_acc'] = '%0.6f'
        return f_train, f_test, f_ae, self.train_args, self.test_args, self.validate_args
예제 #29
0
def get_elementwise_objective(policy,
                              state_values,
                              actions,
                              rewards,
                              is_alive="always",
                              state_values_target=None,
                              n_steps=None,
                              gamma_or_gammas=0.99,
                              crop_last=True,
                              force_values_after_end=True,
                              state_values_after_end="zeros",
                              consider_value_reference_constant=True,
                              consider_predicted_value_constant=True,
                              scan_dependencies=(),
                              scan_strict=True,
                              min_proba=1e-30):
    """
    returns cross-entropy-like objective function for Actor-Critic method

        L_policy = - log(policy) * (V_reference - const(V))
        L_V = (V - Vreference)^2

    :param policy: [batch,tick,action_id] - predicted action probabilities
    :param state_values: [batch,tick] - predicted state values
    :param actions: [batch,tick] - committed actions
    :param rewards: [batch,tick] - immediate rewards for taking actions at given time ticks
    :param is_alive: [batch,tick] - whether given session is still active at given tick. Defaults to always active.
                            Default value of is_alive implies a simplified computation algorithm for Qlearning loss
    :param state_values_target: there should be state values used to compute reference (e.g. older network snapshot)
                If None (defualt), uses current Qvalues to compute reference
    :param n_steps: if an integer is given, the references are computed in loops of 3 states.
            Defaults to None: propagating rewards throughout the whole session.
            If n_steps equals 1, this works exactly as Q-learning (though less efficient one)
            If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies.
    :param gamma_or_gammas: a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts
    :param crop_last: if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end
    :param force_values_after_end: if true, sets reference policy at session end to rewards[end] + qvalues_after_end
    :param state_values_after_end: [batch,1,n_actions] - "next state values" for last tick used for reference only.
                            Defaults at  T.zeros_like(state_values[:,0,None,:])
                            If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] )

    :param consider_value_reference_constant: whether or not to zero-out gradients through the "reference state values" term
    :param consider_predicted_value_constant: whether or not to consider predicted state value constant in the POLICY  LOSS COMPONENT
    :param scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True)
    :param scan_strict: whether to evaluate values using strict theano scan or non-strict one
    :param min_proba: minimum value for policy term. Used to prevent -inf when policy(action) ~ 0.
    :return: elementwise sum of policy_loss + state_value_loss [batch,tick]

    """

    if state_values_target is None:
        state_values_target = state_values

    # get reference values via Q-learning algorithm
    reference_state_values = get_n_step_value_reference(state_values_target, rewards, is_alive,
                                                        n_steps=n_steps,
                                                        optimal_state_values_after_end=state_values_after_end,
                                                        gamma_or_gammas=gamma_or_gammas,
                                                        dependencies=scan_dependencies,
                                                        strict=scan_strict
                                                        )

    # if we have to set after_end values
    if is_alive != "always" and force_values_after_end:
        # if asked to force reference_Q[end_tick+1,a] = 0, do it
        # note: if agent is always alive, this is meaningless

        # set future rewards at session end to rewards+qvalues_after_end
        end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero()

        if state_values_after_end == "zeros":
            # "set reference state values at end action ids to just the immediate rewards"
            reference_state_values = T.set_subtensor(reference_state_values[end_ids], rewards[end_ids])
        else:

            # "set reference state values at end action ids to the immediate rewards + qvalues after end"
            new_state_values = rewards[end_ids] + gamma_or_gammas * state_values_after_end[end_ids[0], 0]
            reference_state_values = T.set_subtensor(reference_state_values[end_ids], new_state_values)

    # now compute the loss
    if is_alive == "always":
        is_alive = T.ones_like(actions, dtype=theano.config.floatX)

    # actor loss
    action_probas = get_action_Qvalues(policy, actions)

    if crop_last:
        reference_state_values = T.set_subtensor(reference_state_values[:,-1],
                                                 state_values[:,-1])
    if consider_value_reference_constant:
        reference_state_values = consider_constant(reference_state_values)

    log_probas = T.log(action_probas)

    #set min proba in a way that does not zero-out the derivatives
    # idea:
    # log(p) = log(p) if p != 0 else log(p+min_proba)
    if min_proba != 0:
        log_probas = T.switch(T.eq(action_probas,0),
                                T.log(action_probas+min_proba),
                                log_probas
                              )

    observed_state_values = consider_constant(state_values) if consider_predicted_value_constant else state_values

    policy_loss_elwise = - log_probas * (reference_state_values - observed_state_values)

    # critic loss
    V_err_elwise = squared_error(reference_state_values, state_values)

    return (policy_loss_elwise + V_err_elwise) * is_alive
예제 #30
0
def get_model():

    dtensor4 = T.TensorType('float32', (False,)*4)
    input_var = dtensor4('inputs')
    dtensor2 = T.TensorType('float32', (False,)*2)
    target_var = dtensor2('targets')

    # input layer with unspecified batch size
    layer_input     = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer_0         = DimshuffleLayer(layer_input, (0, 'x', 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_1         = batch_norm(Conv3DDNNLayer(incoming=layer_0, num_filters=64, filter_size=(3,3,3), stride=(1,3,3), pad='same', nonlinearity=leaky_rectify, W=Orthogonal()))
    layer_2         = MaxPool3DDNNLayer(layer_1, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_3         = DropoutLayer(layer_2, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_4         = batch_norm(Conv3DDNNLayer(incoming=layer_3, num_filters=128, filter_size=(3,3,3), stride=(1,3,3), pad='same', nonlinearity=leaky_rectify, W=Orthogonal()))
    layer_5         = MaxPool3DDNNLayer(layer_4, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_6         = DropoutLayer(layer_5, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_7         = batch_norm(Conv3DDNNLayer(incoming=layer_6, num_filters=256, filter_size=(3,3,3), stride=(1,3,3), pad='same', nonlinearity=leaky_rectify, W=Orthogonal()))
    layer_8         = MaxPool3DDNNLayer(layer_7, pool_size=(1, 2, 2), stride=(1, 2, 2), pad=(0, 1, 1))
    layer_9         = DropoutLayer(layer_8, p=0.25)
    
    # Recurrent layer
    layer_10         = DimshuffleLayer(layer_9, (0,2,1,3,4))
    layer_11         = LSTMLayer(layer_10, num_units=612, hid_init=Orthogonal(), only_return_final=False)

    # Output Layer
    layer_systole    = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal())
    layer_diastole   = DenseLayer(layer_11, 600, nonlinearity=leaky_rectify, W=Orthogonal())
    layer_systole_1  = DropoutLayer(layer_systole, p=0.3)
    layer_diastole_1 = DropoutLayer(layer_diastole, p=0.3)

    layer_systole_2   = DenseLayer(layer_systole_1, 1, nonlinearity=None, W=Orthogonal())
    layer_diastole_2  = DenseLayer(layer_diastole_1, 1, nonlinearity=None, W=Orthogonal())
    layer_output      = ConcatLayer([layer_systole_2, layer_diastole_2])

    # Loss
    prediction           = get_output(layer_output) 
    loss                 = squared_error(prediction, target_var)
    loss                 = loss.mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum Or Adam
    params               = get_all_params(layer_output, trainable=True)
    updates              = adam(loss, params)
    #updates_0            = rmsprop(loss, params)
    #updates              = apply_nesterov_momentum(updates_0, params)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction      = get_output(layer_output, deterministic=True)
    test_loss            = squared_error(test_prediction, target_var)
    test_loss            = test_loss.mean()

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn             = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)

    # Compile a second function computing the validation loss and accuracy
    val_fn               = theano.function([input_var, target_var], test_loss, allow_input_downcast=True)

    # Compule a third function computing the prediction
    predict_fn           = theano.function([input_var], test_prediction, allow_input_downcast=True)

    return [layer_output, train_fn, val_fn, predict_fn]
예제 #31
0
def get_elementwise_objective_components(
        policy,
        rewards,
        policy_values,
        action_values='same',
        is_alive="always",
        n_steps=None,
        gamma_or_gammas=0.99,
        crop_last=True,
        force_values_after_end=True,
        state_values_after_end="zeros",
        consider_value_reference_constant=True,
        consider_predicted_value_constant=True,
        scan_dependencies=tuple(),
        scan_strict=True,
):
    """

    N-step Deterministic Policy Gradient (A2c) implementation.

    Works with continuous action space (real value or vector of such)

    Requires action policy(mu) and state values.

    Based on
    http://arxiv.org/abs/1509.02971
    http://jmlr.org/proceedings/papers/v32/silver14.pdf

    This particular implementation also allows N-step reinforcement learning

    The code mostly relies on the same architecture as advantage actor-critic a2c_n_step


    returns deterministic policy gradient components for actor and critic

        L_policy = -critic(state,policy) = -action_values 
        L_V = (V - Vreference)^2
        
        You will have to independently compute updates for actor and critic and then add them up.
            
    parameters:
    
        policy [batch,tick,action_id] - predicted "optimal policy" (mu)
        rewards [batch,tick] - immediate rewards for taking actions at given time ticks
        policy_values [batch,tick] - predicted state values given OPTIMAL policy
        action_values [batch,tick] - predicted Q_values for commited actions INCLUDING EXPLORATION if any
                            Default value implies action_values = state_values if we have no exploration
        
        is_alive [batch,tick] - whether given session is still active at given tick. Defaults to always active.
                            Default value of is_alive implies a simplified computation algorithm for Qlearning loss
        
        n_steps: if an integer is given, the references are computed in loops of 3 states.
            Defaults to None: propagating rewards throughout the whole session.
            If n_steps equals 1, this works exactly as Q-learning (though less efficient one)
            If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies.
        
        gamma_or_gammas - a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts 
        
        crop_last - if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end
        
        force_values_after_end - if true, sets reference policy at session end to rewards[end] + qvalues_after_end
        
        state_values_after_end[batch,1,n_actions] - "next state values" for last tick used for reference only. 
                            Defaults at  T.zeros_like(state_values[:,0,None,:])
                            If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] )

        
        
        scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True)
        scan_strict: whether to evaluate values using strict theano scan or non-strict one
        
    Returns:
                
        Element-wise sum of policy_loss + state_value_loss

    """

    if action_values == 'same':
        action_values = policy_values

    # get reference values via DPG algorithm
    reference_action_values = get_n_step_value_reference(
        action_values,
        rewards,
        is_alive,
        n_steps=n_steps,
        optimal_state_values_after_end=state_values_after_end,
        gamma_or_gammas=gamma_or_gammas,
        dependencies=scan_dependencies,
        strict=scan_strict)

    if is_alive != "always" and force_values_after_end:
        # if asked to force reference_Q[end_tick+1,a] = 0, do it
        # note: if agent is always alive, this is meaningless

        # set future rewards at session end to rewards+qvalues_after_end
        end_ids = get_end_indicator(is_alive,
                                    force_end_at_t_max=True).nonzero()

        if state_values_after_end == "zeros":
            # "set reference state values at end action ids to just the immediate rewards"
            reference_action_values = T.set_subtensor(
                reference_action_values[end_ids], rewards[end_ids])
        else:
            # "set reference state values at end action ids to the immediate rewards + qvalues after end"
            new_subtensor_values = rewards[
                end_ids] + gamma_or_gammas * state_values_after_end[end_ids[0],
                                                                    0]
            reference_action_values = T.set_subtensor(
                reference_action_values[end_ids], new_subtensor_values)

    # now compute the loss components
    if is_alive == "always":
        is_alive = T.ones_like(action_values, dtype=theano.config.floatX)

    # actor loss
    # here we rely on fact that state_values = critic(state,optimal_policy)
    # using chain rule,
    # grad(state_values,actor_weights) = grad(state_values, optimal_policy)*grad(optimal_policy,actor_weights)
    policy_loss_elwise = -policy_values

    # critic loss
    reference_action_values = consider_constant(reference_action_values)
    v_err_elementwise = squared_error(reference_action_values, action_values)

    if crop_last:
        v_err_elementwise = T.set_subtensor(v_err_elementwise[:, -1], 0)

    return policy_loss_elwise * is_alive, v_err_elementwise * is_alive
예제 #32
0
파일: train.py 프로젝트: zugexiaodui/dfn
def train(options):
    # -------- setup options and data ------------------
    np.random.seed(options['seed'])

    # Load options
    host = socket.gethostname() # get computer hostname
    start_time = datetime.datetime.now().strftime("%y-%m-%d-%H-%M")

    model = importlib.import_module(options['model_file'])

    # ---------- build model and compile ---------------
    input_batch = T.tensor4() # input image sequences
    target = T.tensor4() # target image

    print('Build model...')
    model = model.Model(**options['modelOptions'])

    print('Compile ...')
    net, outputs, filters = model.build_model(input_batch)

    # compute loss
    outputs = get_output(outputs + [filters])
    output_frames = outputs[:-1]
    output_filter = outputs[-1]

    train_losses = []
    for i in range(options['modelOptions']['target_seqlen']):
        output_frame = output_frames[i]

        if options['loss'] == 'squared_error':
            frame_loss = squared_error(output_frame, target[:, [i], :, :])
        elif options['loss'] == 'binary_crossentropy':
            # Clipping to avoid NaN's in binary crossentropy: https://github.com/Lasagne/Lasagne/issues/436
            output_frame = T.clip(output_frame, np.finfo(np.float32).eps, 1-np.finfo(np.float32).eps)
            frame_loss = binary_crossentropy(output_frame, target[:,[i],:,:])
        else:
            assert False

        train_losses.append(aggregate(frame_loss))

    train_loss = sum(train_losses) / options['modelOptions']['target_seqlen']

    # update
    sh_lr = theano.shared(lasagne.utils.floatX(options['learning_rate'])) # to allow dynamic learning rate

    layers = get_all_layers(net)
    all_params = get_all_params(layers, trainable = True)
    updates = adam(train_loss, all_params, learning_rate=sh_lr)
    _train = theano.function([input_batch, target], train_loss, updates=updates, allow_input_downcast=True)
    _test = theano.function([input_batch, target], [train_loss, output_filter] + output_frames, allow_input_downcast=True)

    # ------------ data setup ----------------
    print('Prepare data...')
    dataset = importlib.import_module(options['dataset_file'])
    dh = dataset.DataHandler(**options['datasetOptions'])

    # ------------ training setup ----------------
    if options['pretrained_model_path'] is not None:
        checkpoint = pickle.load(open(options['pretrained_model_path'], 'rb'))
        model_values = checkpoint['model_values'] # overwrite the values of model parameters
        lasagne.layers.set_all_param_values(layers, model_values)

        history_train = checkpoint['history_train']
        start_epoch = checkpoint['epoch'] + 1
        options['batch_size'] = checkpoint['options']['batch_size']
        sh_lr.set_value(floatX(checkpoint['options']['learning_rate']))
    else:
        start_epoch = 0
        history_train = []

    # ------------ actual training ----------------
    print 'Start training ...'

    input_seqlen = options['modelOptions']['input_seqlen']
    for epoch in range(start_epoch, start_epoch + options['num_epochs']):
        epoch_start_time = time.time()

        history_batch = []
        for batch_index in range(0, options['batches_per_epoch']):

            batch = dh.GetBatch() # generate data on the fly
            if options['dataset_file'] == 'datasets.stereoCarsColor':
                batch_input = batch[..., :input_seqlen].squeeze(axis=4)  # first frames
                batch_target = batch[..., input_seqlen:].squeeze(axis=4)  # last frame
            else:
                batch_input = batch[..., :input_seqlen].transpose(0,4,2,3,1).squeeze(axis=4) # first frames
                batch_target = batch[..., input_seqlen:].transpose(0,4,2,3,1).squeeze(axis=4) # last frame

            # train
            loss_train = _train(batch_input, batch_target)
            history_batch.append(loss_train)

            print("Epoch {} of {}, batch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], batch_index+1, options['batches_per_epoch'], time.time() - epoch_start_time))
            print("  training loss:\t{:.6f}".format(loss_train.item()))

        # clear the screen
        display.clear_output(wait=True)

        # print statistics
        history_train.append(np.mean(history_batch))
        history_batch = []
        print("Epoch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], time.time() - epoch_start_time))
        print("  training loss:\t{:.6f}".format(history_train[epoch].item()))

        # set new learning rate (maybe this is unnecessary with adam updates)
        if (epoch+1) % options['decay_after'] == 0:
            options['learning_rate'] = sh_lr.get_value() * 0.5
            print "New LR:", options['learning_rate']
            sh_lr.set_value(floatX(options['learning_rate']))

        # save the model
        if (epoch+1) % options['save_after'] == 0:
            save_model(layers, epoch, history_train, start_time, host, options)
            print("Model saved")
예제 #33
0
                  W=lasagne.init.Normal(0.01)))
l_output = batch_norm(
    Conv2DLayer(l_deconv1_2,
                num_filters=21,
                filter_size=(1, 1),
                pad=0,
                stride=1))  ###

# net_l_deconv2_2 = lasagne.layers.get_output(l_output);
# l_deconv2_2_func = theano.function([l_in.input_var], [net_l_deconv2_2]);
# l_deconv2_2_func_val = l_deconv2_2_func(X_train);
# print(l_deconv2_2_func_val[0].shape);
print('start training 1')
true_output = T.ftensor4('true_output')  ###

loss = squared_error(lasagne.layers.get_output(l_output), true_output).mean()
loss_train = squared_error(
    lasagne.layers.get_output(l_output, deterministic=False),
    true_output).mean()
loss_eval = squared_error(
    lasagne.layers.get_output(l_output, deterministic=True),
    true_output).mean()

all_params = lasagne.layers.get_all_params(l_output, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss_train,
                                            all_params,
                                            learning_rate=0.001,
                                            momentum=0.985)

train = theano.function([l_in.input_var, true_output],
                        loss_train,
예제 #34
0
def train_model(learning_rate_dis=0.0004, learning_rate_model=0.0004, n_epochs=36, batch_size=20, nb_caption='max'):
    '''
            Function that compute the training of the model
            '''

    #######################
    # Loading the dataset #
    #######################

    print ('... Loading data')

    # Load the dataset on the CPU
    data_path = get_path()
    train_input_path = 'train_input_'
    train_target_path = 'train_target_'
    train_caption_path = 'train_caption_'
    valid_input_path = 'valid_input_'
    valid_target_path = 'valid_target_'
    valid_caption_path = 'valid_caption_'
    nb_train_batch = 8


    ######################
    # Building the model #
    ######################

    # Symbolic variables
    # Shape = (_, 3, 64, 64)
    x = T.tensor4('x', dtype=theano.config.floatX)
    # Shape = (_, 3, 32, 32)
    y = T.tensor4('y', dtype=theano.config.floatX)
    # Shape = (_, 3, 32, 32)
    z = T.tensor4('x', dtype=theano.config.floatX)
    # Shape = (_, seq_length)
    w = T.imatrix('captions')

    # Creation of the model
    model = build_context_encoder(input_var1=x, input_var2=w)
    discriminator = build_discriminator(input_var=None)

    fake_image = layers.get_output(model)
    fake_image_det = layers.get_output(model, deterministic=True)
    prob_real = layers.get_output(discriminator, inputs=y)
    prob_fake = layers.get_output(discriminator, inputs=fake_image)

    params_model = layers.get_all_params(model, trainable=True)
    params_dis = layers.get_all_params(discriminator, trainable=True)

    loss_real = -T.mean(T.log(prob_real))
    loss_fake = -T.mean(T.log(1 - prob_fake))
    loss_dis = 0.005 * (loss_real + loss_fake)

    loss_gen = -T.mean(T.log(prob_fake))
    recons_error = T.mean(objectives.squared_error(fake_image, z))
    loss_model = 0.005 * loss_gen + 0.995 * recons_error

    updates_dis = lasagne.updates.adam(loss_dis, params_dis, learning_rate=learning_rate_dis, beta1=0.5)
    updates_model = lasagne.updates.adam(loss_model, params_model, learning_rate=learning_rate_model, beta1=0.5)

    # Creation of theano functions
    train_dis = theano.function([x, y, w], loss_dis, updates=updates_dis, allow_input_downcast=True)

    train_model = theano.function([x, z, w], loss_model, updates=updates_model, allow_input_downcast=True)

    predict_image = theano.function([x, w], fake_image_det, allow_input_downcast=True)

    ###################
    # Train the model #
    ###################

    print('... Training')

    epoch = 0
    nb_train_dis = 25
    nb_train_gen = 10
    nb_batch = 10000 // batch_size
    nb_block = nb_batch // nb_train_dis
    loss_dis = []
    loss_model = []

    idx = [0, 1, 2, 4, 5]
    #start_time = timeit.default_timer()

    while (epoch < n_epochs):
        epoch = epoch + 1
        for i in range(nb_train_batch):
            #print (i)
            # Shape = (10000, 3, 64, 64) & Shape = (10000, 3, 32, 32)
            contour, center = get_image(data_path, train_input_path, train_target_path, str(i))
            # List of captions of different sequence length
            caption = get_caption(data_path, train_caption_path, str(i), str(nb_caption))
            # List of size nb_train_dis
            list = [k % len(caption) for k in range(nb_train_dis)]
            for j in range(nb_block):
                #print (j)
                for index in range(nb_train_dis * j, nb_train_dis * (j + 1)):
                    #print (index)
                    train_caption = caption[list[index % nb_train_dis]]
                    if train_caption.shape[0] >= batch_size:
                        random_idx = random.sample(range(0, train_caption.shape[0]), batch_size)
                    else:
                        random_idx = random.sample(range(0, train_caption.shape[0]), train_caption.shape[0])
                    input = contour[train_caption[random_idx, -1] - i * 10000]
                    target = center[train_caption[random_idx, -1] - i * 10000]
                    train_caption = train_caption[random_idx, :-1]
                    loss = train_dis(input, target, train_caption)
                    loss_dis.append(loss)
                for index in range(nb_train_gen * j, nb_train_gen * (j + 1)):
                    #print (index)
                    rand_nb = random.randint(0, len(list) - 1)
                    train_caption = caption[rand_nb]
                    if train_caption.shape[0] >= batch_size:
                        random_idx = random.sample(range(0, train_caption.shape[0]), batch_size)
                    else:
                        random_idx = random.sample(range(0, train_caption.shape[0]), train_caption.shape[0])
                    input = contour[train_caption[random_idx, -1] - i * 10000]
                    target = center[train_caption[random_idx, -1] - i * 10000]
                    train_caption = train_caption[random_idx, :-1]
                    loss = train_model(input, target, train_caption)
                    loss_model.append(loss)

        if epoch % 4 == 0:
            # save the model and a bunch of generated pictures
            print ('... saving model and generated images')

            np.savez('discriminator_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(discriminator))
            np.savez('context_encoder_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(model))
            np.save('loss_dis', loss_dis)
            np.save('loss_gen', loss_model)

            contour, center = get_image(data_path, valid_input_path, valid_target_path, str(0))
            caption = get_caption(data_path, valid_caption_path, str(0), str(nb_caption))
            valid_caption = caption[4][idx]
            input = contour[valid_caption[:, -1]]

            generated_centers = predict_image(input, valid_caption[:, :-1])
            generated_images = assemble(input, generated_centers)

            for k in range(len(idx)):
                plt.subplot(1, len(idx), (k + 1))
                plt.axis('off')
                plt.imshow(generated_images[k, :, :, :].transpose(1, 2, 0))

            plt.savefig('generated_images_epoch' + str(epoch) + '.png', bbox_inches='tight')

    #end_time = timeit.default_timer()

    # Plot the learning curve
    ax1 = host_subplot(111, axes_class=AA.Axes)
    plt.subplots_adjust(right=0.75)
    ax2 = ax1.twiny()

    x1 = range(1, len(loss_dis) + 1)
    ax1.set_xlim([x1[0], x1[-1]])
    x2 = range(1, len(loss_model) + 1)
    ax2.set_xlim([x2[0], x2[-1]])

    ax1.set_xlabel('training iteration (Discriminator)', color='g')
    ax2.set_xlabel('training iteration (Context encoder)', color='b')
    ax1.set_ylabel('Loss')

    ax1.plot(x1, rolling_average(loss_dis), 'g', label='Discriminator loss')
    ax2.plot(x2, rolling_average(loss_model), 'b', label='Context encoder Loss')

    ax1.grid(True)
    ax1.legend()

    plt.savefig('Learning_curve')

    print('Optimization complete.')
예제 #35
0
def create_network(available_actions_count):
    # Crea las variables de entrada
    s1 = tensor.tensor4("State")
    a = tensor.vector("Action", dtype="int32")
    q2 = tensor.vector("Q2")
    r = tensor.vector("Reward")
    isterminal = tensor.vector("IsTerminal", dtype="int8")

    # Crea la capa de entradad de la red
    dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]],
                     input_var=s1)

    # Agrega 2 capas convolusionales con activacion ReLu activation
    dqn = Conv2DLayer(dqn,
                      num_filters=8,
                      filter_size=[6, 6],
                      nonlinearity=rectify,
                      W=HeUniform("relu"),
                      b=Constant(.1),
                      stride=3)
    dqn = Conv2DLayer(dqn,
                      num_filters=8,
                      filter_size=[3, 3],
                      nonlinearity=rectify,
                      W=HeUniform("relu"),
                      b=Constant(.1),
                      stride=2)

    # Agrega 1 capa competamente conectada.
    dqn = DenseLayer(dqn,
                     num_units=128,
                     nonlinearity=rectify,
                     W=HeUniform("relu"),
                     b=Constant(.1))

    # Agrega la capa de salida (completamente conectada).
    dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None)

    # Definimos la funcion de perdida
    q = get_output(dqn)
    # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r
    target_q = tensor.set_subtensor(
        q[tensor.arange(q.shape[0]), a],
        r + discount_factor * (1 - isterminal) * q2)
    loss = squared_error(q, target_q).mean()

    # Actualizamos los parametros de acuerdo a la gracdiente calculada con RMSProp.
    params = get_all_params(dqn, trainable=True)
    updates = rmsprop(loss, params, learning_rate)

    # Compilamos las funciones de theano
    print("Compiling the network ...")
    function_learn = theano.function([s1, q2, a, r, isterminal],
                                     loss,
                                     updates=updates,
                                     name="learn_fn")
    function_get_q_values = theano.function([s1], q, name="eval_fn")
    function_get_best_action = theano.function([s1],
                                               tensor.argmax(q),
                                               name="test_fn")
    print("Network compiled.")

    def simple_get_best_action(state):
        return function_get_best_action(
            state.reshape([1, 1, resolution[0], resolution[1]]))

    # Retorna los objetos de Theano para la red y las funciones.
    return dqn, function_learn, function_get_q_values, simple_get_best_action
예제 #36
0
def net_dict_rnn(seq_length):
    if seq_length <= 300:
        learning_rate = 1e-2
        learning_rate_changes_by_iteration = {1000: 1e-3, 10000: 1e-4}
    elif seq_length < 1500:
        learning_rate = 1e-4
        learning_rate_changes_by_iteration = {5000: 1e-5, 9000: 1e-6}
    else:
        learning_rate = 1e-5
        learning_rate_changes_by_iteration = {5000: 1e-6, 9000: 1e-7}
    return dict(
        epochs=10000,
        save_plot_interval=1000,
        loss_function=lambda x, t: squared_error(x, t).mean(),
        updates_func=nesterov_momentum,
        learning_rate=learning_rate,
        learning_rate_changes_by_iteration=learning_rate_changes_by_iteration,
        do_save_activations=True,
        auto_reshape=True,
        plotter=Plotter(n_seq_to_plot=32, n_training_examples_to_plot=16),
        layers_config=[
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1)  # (batch, features, time)
            },
            {
                'type': Conv1DLayer,  # convolve over the time axis
                'num_filters': 16,
                'filter_size': 4,
                'stride': 1,
                'nonlinearity': None,
                'border_mode': 'same'
            },
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1),  # back to (batch, time, features)
                'label': 'dimshuffle3'
            },
            {
                'type': BLSTMLayer,
                'num_units': 128,
                'merge_mode': 'concatenate',
                'grad_clipping': 10.0,
                'gradient_steps': 500
            },
            {
                'type': BLSTMLayer,
                'num_units': 256,
                'merge_mode': 'concatenate',
                'grad_clipping': 10.0,
                'gradient_steps': 500
            },
            {
                'type': DenseLayer,
                'num_units': 128,
                'nonlinearity': tanh
            },
            {
                'type': DenseLayer,
                'num_units': 1,
                'nonlinearity': None
            }
        ])
예제 #37
0
def get_elementwise_objective(policy,
                              state_values,
                              actions,
                              rewards,
                              is_alive="always",
                              n_steps=None,
                              gamma_or_gammas=0.99,
                              crop_last=True,
                              force_values_after_end=True,
                              state_values_after_end="zeros",
                              consider_value_reference_constant=True,
                              consider_predicted_value_constant=True,
                              scan_dependencies=[],
                              scan_strict=True,
                              min_log_proba=-1e50):
    """
    returns cross-entropy-like objective function for Actor-Critic method

        L_policy = - log(policy) * (V_reference - const(V))
        L_V = (V - Vreference)^2
            
    parameters:
    
        policy [batch,tick,action_id] - predicted action probabilities
        state_values [batch,tick] - predicted state values
        actions [batch,tick] - committed actions
        rewards [batch,tick] - immediate rewards for taking actions at given time ticks
        
        is_alive [batch,tick] - whether given session is still active at given tick. Defaults to always active.
                            Default value of is_alive implies a simplified computation algorithm for Qlearning loss
        
        n_steps: if an integer is given, the references are computed in loops of 3 states.
            Defaults to None: propagating rewards throughout the whole session.
            If n_steps equals 1, this works exactly as Q-learning (though less efficient one)
            If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies.
        
        gamma_or_gammas - a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts 
        
        crop_last - if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end
        
        force_values_after_end - if true, sets reference policy at session end to rewards[end] + qvalues_after_end
        
        state_values_after_end[batch,1,n_actions] - "next state values" for last tick used for reference only. 
                            Defaults at  T.zeros_like(state_values[:,0,None,:])
                            If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] )

        
        
        scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True)
        scan_strict: whether to evaluate values using strict theano scan or non-strict one
        
    Returns:
                
        elementwise sum of policy_loss + state_value_loss

    """

    # get reference values via Q-learning algorithm
    reference_state_values = get_n_step_value_reference(
        state_values,
        rewards,
        is_alive,
        n_steps=n_steps,
        optimal_state_values_after_end=state_values_after_end,
        gamma_or_gammas=gamma_or_gammas,
        dependencies=scan_dependencies,
        strict=scan_strict)

    # if we have to set after_end values
    if is_alive != "always" and force_values_after_end:
        # if asked to force reference_Q[end_tick+1,a] = 0, do it
        # note: if agent is always alive, this is meaningless

        # set future rewards at session end to rewards+qvalues_after_end
        end_ids = get_end_indicator(is_alive,
                                    force_end_at_t_max=True).nonzero()

        if state_values_after_end == "zeros":
            # "set reference state values at end action ids to just the immediate rewards"
            reference_state_values = T.set_subtensor(
                reference_state_values[end_ids], rewards[end_ids])
        else:

            # "set reference state values at end action ids to the immediate rewards + qvalues after end"
            new_state_values = rewards[
                end_ids] + gamma_or_gammas * state_values_after_end[end_ids[0],
                                                                    0]
            reference_state_values = T.set_subtensor(
                reference_state_values[end_ids], new_state_values)

    # now compute the loss
    if is_alive == "always":
        is_alive = T.ones_like(actions, dtype=theano.config.floatX)

    # actor loss
    action_probas = get_action_Qvalues(policy, actions)

    reference_state_values = consider_constant(reference_state_values)
    if crop_last:
        reference_state_values = T.set_subtensor(
            reference_state_values[:, -1],
            consider_constant(state_values[:, -1]))

    log_probas = T.maximum(T.log(action_probas), min_log_proba)

    policy_loss_elwise = -log_probas * (reference_state_values -
                                        consider_constant(state_values))

    # critic loss
    V_err_elwise = squared_error(reference_state_values, state_values)

    return (policy_loss_elwise + V_err_elwise) * is_alive
예제 #38
0
INPUT_STATS = {
    'mean': np.array([297.87216187], dtype=np.float32),
    'std': np.array([374.43884277], dtype=np.float32)
}


def only_train_on_real_data(net, iteration):
    net.logger.info(
        "Iteration {}: Now only training on real data.".format(iteration))
    net.source.sources[0]['train_probability'] = 0.0
    net.source.sources[1]['train_probability'] = 1.0


net_dict = dict(save_plot_interval=SAVE_PLOT_INTERVAL,
                loss_function=lambda x, t: squared_error(x, t).mean(),
                updates_func=nesterov_momentum,
                learning_rate=1e-1,
                learning_rate_changes_by_iteration={
                    1000: 1e-2,
                    10000: 1e-3
                },
                epoch_callbacks={350000: only_train_on_real_data},
                do_save_activations=True,
                auto_reshape=True,
                layers_config=[{
                    'type': DenseLayer,
                    'num_units': 10,
                    'nonlinearity': tanh
                }, {
                    'type': BLSTMLayer,
예제 #39
0
def test_squared_error_preserve_dtype():
    from lasagne.objectives import squared_error
    for dtype in 'float64', 'float32', 'float16':
        a = theano.tensor.matrix('a', dtype=dtype)
        b = theano.tensor.matrix('b', dtype=dtype)
        assert squared_error(a, b).dtype == dtype
예제 #40
0
def main():
    def signal_handler(signal, frame):
        global terminate
        terminate = True
        print('terminating...'.format(terminate))

    signal.signal(signal.SIGINT, signal_handler)
    configure_theano()
    options = parse_options()
    X, X_val = generate_data()

    # X = np.reshape(X, (-1, 1, 30, 40))[:-5]
    print('X type and shape:', X.dtype, X.shape)
    print('X.min():', X.min())
    print('X.max():', X.max())

    # X_val = np.reshape(X_val, (-1, 1, 30, 40))[:-1]
    print('X_val type and shape:', X_val.dtype, X_val.shape)
    print('X_val.min():', X_val.min())
    print('X_val.max():', X_val.max())

    # we need our target to be 1 dimensional
    X_out = X.reshape((X.shape[0], -1))
    X_val_out = X_val.reshape((X_val.shape[0], -1))
    print('X_out:', X_out.dtype, X_out.shape)
    print('X_val_out', X_val_out.dtype, X_val_out.shape)

    # X_noisy = apply_gaussian_noise(X_out)
    # visualize_reconstruction(X_noisy[0:25], X_out[0:25], shape=(28, 28))
    # X = np.reshape(X_noisy, (-1, 1, 28, 28))

    print('constructing and compiling model...')
    # input_var = T.tensor4('input', dtype='float32')
    input_var = T.tensor3('input', dtype='float32')
    target_var = T.matrix('output', dtype='float32')
    lr = theano.shared(np.array(0.8, dtype=theano.config.floatX),
                       name='learning_rate')
    lr_decay = np.array(0.9, dtype=theano.config.floatX)

    # try building a reshaping layer
    # network = create_model(input_var, (None, 1, 30, 40), options)
    l_input = InputLayer((None, None, 1200), input_var, name='input')
    l_input = ReshapeLayer(l_input, (-1, 1, 30, 40), name='reshape_input')
    # l_input = InputLayer((None, 1, 30, 40), input_var, name='input')
    if options['MODEL'] == 'normal':
        network, encoder = avletters_convae.create_model(l_input, options)
    if options['MODEL'] == 'batchnorm':
        network, encoder = avletters_convae_bn.create_model(l_input, options)
    if options['MODEL'] == 'dropout':
        network, encoder = avletters_convae_drop.create_model(l_input, options)
    if options['MODEL'] == 'bn+dropout':
        network, encoder = avletters_convae_bndrop.create_model(
            l_input, options)

    print('AE Network architecture: {}'.format(options['MODEL']))
    print_network(network)

    recon = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(squared_error(recon, target_var))
    updates = adadelta(cost, all_params, lr)
    # updates = las.updates.apply_nesterov_momentum(updates, all_params, momentum=0.90)

    use_max_constraint = False
    print('apply max norm constraint: {}'.format(use_max_constraint))
    if use_max_constraint:
        MAX_NORM = 4
        for param in las.layers.get_all_params(network, regularizable=True):
            if param.ndim > 1:  # only apply to dimensions larger than 1, exclude biases
                # updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean())
                updates[param] = norm_constraint(param, MAX_NORM)

    train = theano.function([input_var, target_var],
                            recon,
                            updates=updates,
                            allow_input_downcast=True)
    train_cost_fn = theano.function([input_var, target_var],
                                    cost,
                                    allow_input_downcast=True)

    eval_recon = las.layers.get_output(network, deterministic=True)
    eval_cost = T.mean(las.objectives.squared_error(eval_recon, target_var))
    eval_cost_fn = theano.function([input_var, target_var],
                                   eval_cost,
                                   allow_input_downcast=True)
    recon_fn = theano.function([input_var],
                               eval_recon,
                               allow_input_downcast=True)

    if terminate:
        exit()

    NUM_EPOCHS = options['NUM_EPOCHS']
    EPOCH_SIZE = options['EPOCH_SIZE']
    NO_STRIDES = options['NO_STRIDES']
    VAL_NO_STRIDES = options['VAL_NO_STRIDES']

    print('begin training for {} epochs...'.format(NUM_EPOCHS))
    datagen = batch_iterator(X, X_out, 128)

    costs = []
    val_costs = []
    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            batch_X, batch_y = next(datagen)
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(batch_X), lr.get_value())
            print(print_str, end='')
            sys.stdout.flush()
            batch_X = batch_X.reshape((-1, 1, 1200))
            train(batch_X, batch_y)
            print('\r', end='')
            if terminate:
                break
        if terminate:
            break

        cost = batch_compute_cost(X, X_out, NO_STRIDES, train_cost_fn)
        val_cost = batch_compute_cost(X_val, X_val_out, VAL_NO_STRIDES,
                                      eval_cost_fn)
        costs.append(cost)
        val_costs.append(val_cost)

        print("Epoch {} train cost = {}, validation cost = {} ({:.1f}sec) ".
              format(epoch + 1, cost, val_cost,
                     time.time() - time_start))
        if epoch > 10:
            lr.set_value(lr.get_value() * lr_decay)

    X_val_recon = recon_fn(X_val)
    visualize_reconstruction(X_val_out[450:550],
                             X_val_recon[450:550],
                             shape=(30, 40),
                             savefilename='avletters')
    plot_validation_cost(costs, val_costs, None, savefilename='valid_cost')

    conv2d1 = las.layers.get_all_layers(network)[2]
    visualize.plot_conv_weights(conv2d1, (15, 14)).savefig('conv2d1.png')

    print('saving encoder...')
    save_model(encoder, 'models/conv_encoder.dat')
    save_model(network, 'models/conv_ae.dat')
예제 #41
0
def net_dict_ae_rnn(seq_length):
    NUM_FILTERS = 8
    return dict(
        epochs=None,
        save_plot_interval=5000,
        loss_function=lambda x, t: squared_error(x, t).mean(),
        updates_func=nesterov_momentum,
        learning_rate=1e-2,
        learning_rate_changes_by_iteration={110000: 1e-3},
        do_save_activations=True,
        auto_reshape=False,
        plotter=Plotter(n_seq_to_plot=32, n_training_examples_to_plot=16),
        layers_config=[
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1)  # (batch, features, time)
            },
            {
                'label': 'conv0',
                'type': Conv1DLayer,  # convolve over the time axis
                'num_filters': NUM_FILTERS,
                'filter_size': 4,
                'stride': 1,
                'nonlinearity': None,
                'pad': 'valid'
            },
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1)  # back to (batch, time, features)
            },
            {
                'type': DenseLayer,
                'num_units': (seq_length - 3) * NUM_FILTERS,
                'nonlinearity': rectify
            },
            {
                'type': DenseLayer,
                'num_units': 128,
                'nonlinearity': rectify
            },
            {
                'type': DenseLayer,
                'num_units': (seq_length - 3) * NUM_FILTERS,
                'nonlinearity': rectify
            },
            {
                'type': ReshapeLayer,
                'shape': (-1, (seq_length - 3), NUM_FILTERS)
            },
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1)  # (batch, features, time)
            },
            {  # DeConv
                'type': Conv1DLayer,
                'num_filters': 1,
                'filter_size': 4,
                'stride': 1,
                'nonlinearity': None,
                'pad': 'full'
            },
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1),  # back to (batch, time, features)
                'label': 'AE_output'
            }
        ],
        layer_changes={
            100001: {
                'new_layers': [{
                    'type': ConcatLayer,
                    'axis': 2,
                    'incomings': ['input', 'AE_output']
                }, {
                    'type': ReshapeLayer,
                    'shape': (64 * seq_length, 2)
                }, {
                    'type': DenseLayer,
                    'num_units': 16,
                    'nonlinearity': tanh
                }, {
                    'type': ReshapeLayer,
                    'shape': (64, seq_length, 16)
                }, {
                    'type': BLSTMLayer,
                    'num_units': 128,
                    'merge_mode': 'concatenate',
                    'grad_clipping': 10.0,
                    'gradient_steps': 500
                }, {
                    'type': BLSTMLayer,
                    'num_units': 256,
                    'merge_mode': 'concatenate',
                    'grad_clipping': 10.0,
                    'gradient_steps': 500
                }, {
                    'type': ReshapeLayer,
                    'shape': (64 * seq_length, 512)
                }, {
                    'type': DenseLayer,
                    'num_units': 128,
                    'nonlinearity': tanh
                }, {
                    'type': DenseLayer,
                    'num_units': 1,
                    'nonlinearity': None
                }]
            }
        })
예제 #42
0
def get_elementwise_objective(
        Qvalues,
        actions,
        rewards,
        is_alive="always",
        Qvalues_target=None,
        n_steps=None,
        gamma_or_gammas=0.95,
        crop_last=True,
        force_qvalues_after_end=True,
        optimal_qvalues_after_end="zeros",
        consider_reference_constant=True,
        aggregation_function=lambda qv: T.max(qv, axis=-1),
        return_reference=False,
        scan_dependencies=[],
        scan_strict=True):
    """
    Returns squared error between predicted and reference Q-values according to n-step Q-learning algorithm

        Qreference(state,action) = reward(state,action) + gamma*reward(state_1,action_1) + ... + gamma^n * max[action_n]( Q(state_n,action_n)
        loss = mean over (Qvalues - Qreference)**2

    :param Qvalues: [batch,tick,action_id] - predicted qvalues
    :param actions: [batch,tick] - commited actions
    :param rewards: [batch,tick] - immediate rewards for taking actions at given time ticks
    :param is_alive: [batch,tick] - whether given session is still active at given tick. Defaults to always active.
                            Default value of is_alive implies a simplified computation algorithm for Qlearning loss
    :param Qvalues_target: Older snapshot Qvalues (e.g. from a target network). If None, uses current Qvalues
    :param n_steps: if an integer is given, the references are computed in loops of 3 states.
            Defaults to None: propagating rewards throughout the whole session.
            If n_steps equals 1, this works exactly as Q-learning (though less efficient one)
            If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies.
    :param gamma_or_gammas: delayed reward discounts: a single value or array[batch,tick](can broadcast dimensions).
    :param crop_last: if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end
    :param force_qvalues_after_end: if true, sets reference Qvalues at session end to rewards[end] + qvalues_after_end
    :param optimal_qvalues_after_end: [batch,1] - symbolic expression for "best next state q-values" for last tick
                            used when computing reference Q-values only.
                            Defaults at  T.zeros_like(Q-values[:,0,None,0])
                            If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] )
    :param consider_reference_constant: whether or not zero-out gradient flow through reference_Qvalues
            (True is highly recommended)
    :param aggregation_function: a function that takes all Qvalues for "next state Q-values" term and returns what
                                is the "best next Q-value". Normally you should not touch it. Defaults to max over actions.
                                Normally you shouldn't touch this
                                Takes input of [batch,n_actions] Q-values
    :param return_reference: if True, returns reference Qvalues.
            If False, returns squared_error(action_Qvalues, reference_Qvalues)
    :param scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True)
    :param scan_strict: whether to evaluate Qvalues using strict theano scan or non-strict one
    :return: mean squared error over Q-values (using formula above for loss)

    """
    if Qvalues_target is None:
        Qvalues_target = Qvalues

    # get Qvalues of best actions (used every K steps for reference Q-value computation
    optimal_Qvalues_target = aggregation_function(Qvalues_target)

    # get predicted Q-values for committed actions by both current and target networks
    # (to compare with reference Q-values and use for recurrent reference computation)
    action_Qvalues = get_action_Qvalues(Qvalues, actions)
    action_Qvalues_target = get_action_Qvalues(Qvalues_target, actions)

    # get reference Q-values via Q-learning algorithm
    reference_Qvalues = get_n_step_value_reference(
        state_values=action_Qvalues_target,
        rewards=rewards,
        is_alive=is_alive,
        n_steps=n_steps,
        gamma_or_gammas=gamma_or_gammas,
        optimal_state_values=optimal_Qvalues_target,
        optimal_state_values_after_end=optimal_qvalues_after_end,
        dependencies=scan_dependencies,
        strict=scan_strict)

    if consider_reference_constant:
        # do not pass gradient through reference Qvalues (since they DO depend on Qvalues by default)
        reference_Qvalues = consider_constant(reference_Qvalues)

    if force_qvalues_after_end and is_alive != "always":
        # if asked to force reference_Q[end_tick+1,a] = 0, do it
        # note: if agent is always alive, this is meaningless
        # set future rewards at session end to rewards+qvalues_after_end
        end_ids = get_end_indicator(is_alive,
                                    force_end_at_t_max=True).nonzero()

        if optimal_qvalues_after_end == "zeros":
            # "set reference Q-values at end action ids to just the immediate rewards"
            reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids],
                                                rewards[end_ids])
        else:
            # "set reference Q-values at end action ids to the immediate rewards + qvalues after end"
            new_reference_values = rewards[
                end_ids] + gamma_or_gammas * optimal_qvalues_after_end
            reference_Qvalues = T.set_subtensor(
                reference_Qvalues[end_ids], new_reference_values[end_ids[0],
                                                                 0])

    #If asked, make sure loss equals 0 for the last time-tick.
    if crop_last:
        reference_Qvalues = T.set_subtensor(reference_Qvalues[:, -1],
                                            action_Qvalues[:, -1])

    if return_reference:
        return reference_Qvalues
    else:
        # tensor of elementwise squared errors
        elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues)
        return elwise_squared_error * is_alive
예제 #43
0
    def __init__(self,
                 load_weights=True,
                 is_training=True,
                 model_name='dronet_weights.npz'):

        self.model_name = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), model_name)

        def network(image):
            input_image = InputLayer(input_var=image,
                                     shape=(None, 1, 120, 160))

            conv1 = Conv2DLayer(input_image,
                                num_filters=32,
                                filter_size=(5, 5),
                                stride=(2, 2),
                                nonlinearity=rectify,
                                pad='same')

            pool1 = MaxPool2DLayer(conv1,
                                   pool_size=(3, 3),
                                   stride=(2, 2),
                                   pad=1)

            conv2 = batch_norm(
                Conv2DLayer(pool1,
                            num_filters=32,
                            filter_size=(3, 3),
                            stride=(2, 2),
                            nonlinearity=rectify,
                            pad='same'))

            conv2 = batch_norm(
                Conv2DLayer(conv2,
                            num_filters=32,
                            filter_size=(3, 3),
                            stride=(1, 1),
                            nonlinearity=rectify,
                            pad='same'))

            downsample1 = Conv2DLayer(pool1,
                                      num_filters=32,
                                      filter_size=(1, 1),
                                      stride=(2, 2),
                                      nonlinearity=rectify,
                                      pad='same')

            input3 = ElemwiseSumLayer([downsample1, conv2])

            conv3 = batch_norm(
                Conv2DLayer(input3,
                            num_filters=64,
                            filter_size=(3, 3),
                            stride=(2, 2),
                            nonlinearity=rectify,
                            pad='same'))

            conv3 = batch_norm(
                Conv2DLayer(conv3,
                            num_filters=64,
                            filter_size=(3, 3),
                            stride=(1, 1),
                            nonlinearity=rectify,
                            pad='same'))

            downsample2 = Conv2DLayer(input3,
                                      num_filters=64,
                                      filter_size=(1, 1),
                                      stride=(2, 2),
                                      nonlinearity=rectify,
                                      pad='same')

            input4 = ElemwiseSumLayer([downsample2, conv3])

            conv4 = batch_norm(
                Conv2DLayer(input4,
                            num_filters=128,
                            filter_size=(3, 3),
                            stride=(2, 2),
                            nonlinearity=rectify,
                            pad='same'))

            conv4 = batch_norm(
                Conv2DLayer(conv4,
                            num_filters=128,
                            filter_size=(3, 3),
                            stride=(1, 1),
                            nonlinearity=rectify,
                            pad='same'))

            downsample3 = Conv2DLayer(input4,
                                      num_filters=128,
                                      filter_size=(1, 1),
                                      stride=(2, 2),
                                      nonlinearity=rectify,
                                      pad='same')

            input5 = ElemwiseSumLayer([downsample3, conv4])

            flatten = DropoutLayer(FlattenLayer(input5), 0.5)

            prob_out = DenseLayer(flatten, num_units=1, nonlinearity=sigmoid)

            turn_angle = DenseLayer(flatten, num_units=1, nonlinearity=tanh)

            return prob_out, turn_angle

        # declare the variables used in the network
        self.X = T.ftensor4()
        self.Y = T.fmatrix()
        self.Z = T.fmatrix()

        # Lasagne object for the network
        self.CollisionProbability, self.TurnAngle = network(self.X)

        if is_training:
            # collision probability for training
            # and testing. Output is a theano object
            self.collision_prob = get_output(self.CollisionProbability)
            self.collision_prob_test = get_output(self.CollisionProbability,
                                                  deterministic=True)

            # turn angle for training anf testing.
            # Output is a theano object.
            self.turn_angle = get_output(self.TurnAngle)
            self.turn_angle_test = get_output(self.TurnAngle,
                                              deterministic=True)

            # Loss for the network.
            self.collision_loss = binary_crossentropy(self.collision_prob,
                                                      self.Y).mean()
            self.turn_loss = squared_error(self.turn_angle, self.Z).mean()

            # Loss to call for testing and validation.
            self.test_collision_loss = binary_crossentropy(
                self.collision_prob_test, self.Y).mean()
            self.test_turn_loss = squared_error(self.turn_angle_test,
                                                self.Z).mean()

            # network parameters for training.
            self.collision_params = get_all_params(self.CollisionProbability,
                                                   trainable=True)
            self.turn_params = get_all_params(self.TurnAngle, trainable=True)

            # network updates
            self.collision_updates = adam(self.collision_loss,
                                          self.collision_params,
                                          learning_rate=0.001)

            self.turn_updates = adam(self.turn_loss,
                                     self.turn_params,
                                     learning_rate=0.00005)

            # get test loss
            self.test_collision = theano.function(
                inputs=[self.X, self.Y],
                outputs=self.test_collision_loss,
                allow_input_downcast=True)

            self.test_turn = theano.function(inputs=[self.X, self.Z],
                                             outputs=self.test_turn_loss,
                                             allow_input_downcast=True)

            # training functions
            self.train_collision = theano.function(
                inputs=[self.X, self.Y],
                outputs=self.collision_loss,
                updates=self.collision_updates,
                allow_input_downcast=True)

            self.train_turn = theano.function(inputs=[self.X, self.Z],
                                              outputs=self.turn_loss,
                                              updates=self.turn_updates,
                                              allow_input_downcast=True)

        else:
            # collision probability for
            # testing. Output is a theano object
            self.collision_prob_test = get_output(self.CollisionProbability,
                                                  deterministic=True)

            # turn angle for testing.
            # Output is a theano object.
            self.turn_angle_test = get_output(self.TurnAngle,
                                              deterministic=True)

        # run the network to calculate collision probability
        # and turn angle given an input.
        self.dronet = theano.function(
            inputs=[self.X],
            outputs=[self.turn_angle_test, self.collision_prob_test],
            allow_input_downcast=True)

        def load():
            with np.load(self.model_name) as f:
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            set_all_param_values([self.CollisionProbability, self.TurnAngle],
                                 param_values)

        if load_weights:
            load()
예제 #44
0
def create_network(available_actions_count):
    # Create the input variables
    s1 = tensor.tensor4("State")
    a = tensor.vector("Action", dtype="int32")
    q2 = tensor.vector("Q2")
    r = tensor.vector("Reward")
    isterminal = tensor.vector("IsTerminal", dtype="int8")

    # Create the input layer of the network.
    dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]],
                     input_var=s1)

    # Add 2 convolutional layers with ReLu activation
    dqn = Conv2DLayer(dqn,
                      num_filters=8,
                      filter_size=[6, 6],
                      nonlinearity=rectify,
                      W=HeUniform("relu"),
                      b=Constant(.1),
                      stride=3)
    dqn = Conv2DLayer(dqn,
                      num_filters=8,
                      filter_size=[3, 3],
                      nonlinearity=rectify,
                      W=HeUniform("relu"),
                      b=Constant(.1),
                      stride=2)

    # Add a single fully-connected layer.
    dqn = DenseLayer(dqn,
                     num_units=128,
                     nonlinearity=rectify,
                     W=HeUniform("relu"),
                     b=Constant(.1))

    # Add the output layer (also fully-connected).
    # (no nonlinearity as it is for approximating an arbitrary real function)
    dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None)

    # Define the loss function
    q = get_output(dqn)
    # target differs from q only for the selected action. The following means:
    # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r
    target_q = tensor.set_subtensor(
        q[tensor.arange(q.shape[0]), a],
        r + discount_factor * (1 - isterminal) * q2)
    loss = squared_error(q, target_q).mean()

    # Update the parameters according to the computed gradient using RMSProp.
    params = get_all_params(dqn, trainable=True)
    updates = rmsprop(loss, params, learning_rate)

    # Compile the theano functions
    print("Compiling the network ...")
    function_learn = theano.function([s1, q2, a, r, isterminal],
                                     loss,
                                     updates=updates,
                                     name="learn_fn")
    function_get_q_values = theano.function([s1], q, name="eval_fn")
    function_get_best_action = theano.function([s1],
                                               tensor.argmax(q),
                                               name="test_fn")
    print("Network compiled.")

    def simple_get_best_action(state):
        return function_get_best_action(
            state.reshape([1, 1, resolution[0], resolution[1]]))

    # Returns Theano objects for the net and functions.
    return dqn, function_learn, function_get_q_values, simple_get_best_action
예제 #45
0
X_action         = T.bvector()
X_reward         = T.fvector()
X_done           = T.bvector()

X_action_hot = to_one_hot(X_action, n_action)

q_        = q_network(X_state);      q        = get_output(q_)
q_target_ = q_network(X_next_state); q_target = get_output(q_target_)
q_max     = T.max(q_target, axis=1)
action    = T.argmax(q, axis=1)

mu = theano.function(inputs               = [X_state],
                     outputs              = action,
                     allow_input_downcast = True)

loss = squared_error(X_reward + gamma * q_max * (1.0 - X_done), T.batched_dot(q, X_action_hot))
loss = loss.mean()

params = get_all_params(q_)

grads        = T.grad(loss,
                      params)

normed_grads = total_norm_constraint(grads, 1.0)

updates = adam(normed_grads,
               params,
               learning_rate = learning_rate)

update_network = theano.function(inputs               = [X_state,
                                                         X_action,
예제 #46
0
파일: sarsa.py 프로젝트: Omrigan/AgentNet
def get_elementwise_objective(Qvalues,
                              actions,
                              rewards,
                              is_alive="always",
                              gamma_or_gammas=0.95,
                              force_qvalues_after_end=True,
                              qvalues_after_end="zeros",
                              consider_reference_constant=True, ):
    """
    Returns squared error between predicted and reference Qvalues according to Q-learning algorithm
    
        Qreference(state,action) = reward(state,action) + gamma* Q(next_state,next_action)  
        loss = mean over (Qvalues - Qreference)**2
        
    parameters:
    
        Qvalues [batch,tick,action_id] - predicted qvalues
        actions [batch,tick] - commited actions
        rewards [batch,tick] - immediate rewards for taking actions at given time ticks
        
        is_alive [batch,tick] - whether given session is still active at given tick. Defaults to always active.
                            Default value of is_alive implies a simplified computation algorithm for Qlearning loss
        
        gamma_or_gammas - a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts 
        
        force_qvalues_after_end - if true, sets reference Qvalues at session end to rewards[end] + qvalues_after_end
        
        qvalues_after_end [batch,1,n_actions] - symbolic expression for "next state q-values" for last tick used for reference only. 
                            Defaults at  T.zeros_like(Qvalues[:,0,None,:])
                            If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] )

        consider_reference_constant - whether or not zero-out gradient flow through reference_Qvalues
            (True is highly recommended)
    Returns:
                
        tensor [batch, tick] of squared errors over Qvalues (using formula above for loss)

    """
    # get reference Qvalues via Q-learning algorithm
    reference_Qvalues = get_reference_Qvalues(Qvalues, actions, rewards,
                                              gamma_or_gammas=gamma_or_gammas,
                                              qvalues_after_end=qvalues_after_end,
                                              )

    if consider_reference_constant:
        # do not pass gradient through reference Q-values (since they DO depend on Q-values by default)
        reference_Qvalues = consider_constant(reference_Qvalues)

    # get predicted qvalues for committed actions (to compare with reference Q-values)
    action_Qvalues = get_action_Qvalues(Qvalues, actions)

    # if agent is always alive, return the simplified loss
    if is_alive == "always":

        # tensor of element-wise squared errors
        elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues)

    else:
        # we are given an is_alive matrix : uint8[batch,tick]

        # if asked to force reference_Q[end_tick+1,a] = 0, do it
        # note: if agent is always alive, this is meaningless

        if force_qvalues_after_end:
            # set future rewards at session end to rewards + qvalues_after_end
            end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero()

            if qvalues_after_end == "zeros":
                # "set reference Q-values at end action ids to just the immediate rewards"
                reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids], rewards[end_ids])
            else:
                last_optimal_rewards = T.zeros_like(rewards[:, 0])

                # "set reference Q-values at end action ids to the immediate rewards + qvalues after end"
                reference_Qvalues = T.set_subtensor(reference_Qvalues[end_ids],
                                                    rewards[end_ids] + gamma_or_gammas * last_optimal_rewards[
                                                        end_ids[0], 0]
                                                    )

        # tensor of element-wise squared errors
        elwise_squared_error = squared_error(reference_Qvalues, action_Qvalues)

        # zero-out loss after session ended
        elwise_squared_error = elwise_squared_error * is_alive

    return elwise_squared_error
예제 #47
0
l_input = InputLayer((None, chan, width, height), input_var=input_var)
l_conv1 = Conv2DLayer(l_input, num_filters=32, filter_size=(3, 3),
                      nonlinearity=rectify, W=GlorotUniform())
l_pool1 = MaxPool2DLayer(l_conv1, pool_size=(2, 2))

l_conv2 = Conv2DLayer(l_pool1, num_filters=32, filter_size=(1, 1),
                      nonlinearity=rectify, W=GlorotUniform())
l_depool1 = Unpool2DLayer(l_pool1, (2, 2))
l_deconv1 = TransposeConv2DLayer(l_depool1, num_filters=chan,
                                 filter_size=(3, 3),
                                 W=GlorotUniform(), nonlinearity=linear)

l_out = l_deconv1

prediction = get_output(l_out)
train_loss = squared_error(prediction, target_var)
train_loss = train_loss.mean()

valid_prediction = get_output(l_out, deterministic=True)
valid_loss = squared_error(valid_prediction, target_var)
valid_loss = valid_loss.mean()

params = get_all_params(l_out, trainable=True)
updates = adam(train_loss, params, learning_rate=1E-4)

train_function = theano.function([input_var, target_var], train_loss,
                                 updates=updates)
valid_function = theano.function([input_var, target_var], valid_loss)

n_epochs = 1000
for e in range(n_epochs):
예제 #48
0
    def __init__(self, source, layers_config,
                 updates_func=nesterov_momentum,
                 updates_kwargs=None,
                 learning_rate=0.1,
                 learning_rate_changes_by_iteration=None,
                 experiment_name="",
                 validation_interval=10,
                 save_plot_interval=100,
                 loss_function=lambda x, t: squared_error(x, t).mean(),
                 layer_changes=None,
                 seed=42,
                 epoch_callbacks=None,
                 do_save_activations=True,
                 plotter=Plotter(),
                 auto_reshape=True,
                 logger=None):
        """
        Parameters
        ----------
        layers_config : list of dicts.  Keys are:
            'type' : BLSTMLayer or a subclass of lasagne.layers.Layer
            'num_units' : int
        """
        if logger is None:
            self.logger = logging.getLogger(experiment_name)
        else:
            self.logger = logger
        self.logger.info("Initialising network...")

        if seed is not None:
            np.random.seed(seed)
        self.source = source
        self.updates_func = updates_func
        self._learning_rate = theano.shared(
            sfloatX(learning_rate), name='learning_rate')
        self.logger.info(
            "Learning rate initialised to {:.1E}".format(learning_rate))
        self.learning_rate_changes_by_iteration = none_to_dict(
            learning_rate_changes_by_iteration)
        self.updates_kwargs = none_to_dict(updates_kwargs)
        self.experiment_name = experiment_name
        self.validation_interval = validation_interval
        self.save_plot_interval = save_plot_interval
        self.loss_function = loss_function
        self.layer_changes = none_to_dict(layer_changes)
        self.epoch_callbacks = none_to_dict(epoch_callbacks)
        self.do_save_activations = do_save_activations
        self.plotter = plotter
        self.plotter.net = self
        self.auto_reshape = auto_reshape

        self.set_csv_filenames()
        self.generate_validation_data_and_set_shapes()

        self.validation_costs = []
        self.training_costs = []
        self.training_costs_metadata = []
        self.layers = []
        self.layer_labels = {}

        # Shape is (number of examples per batch,
        #           maximum number of time steps per example,
        #           number of features per example)
        input_layer = InputLayer(shape=self.input_shape)
        self.layer_labels['input'] = input_layer
        self.layers.append(input_layer)
        self.add_layers(layers_config)
        self.logger.info(
            "Done initialising network for " + self.experiment_name)
예제 #49
0
                            nonlinearity=rectify,
                            W=lasagne.init.Normal(0.01))
l_deconv1_2 = Deconv2DLayer(l_deconv1_1,
                            num_filters=64,
                            filter_size=(3, 3),
                            stride=1,
                            crop='same',
                            nonlinearity=rectify,
                            W=lasagne.init.Normal(0.01))
l_output = Conv2DLayer(l_deconv1_2, num_filters=21, filter_size=(1, 1),
                       pad=0)  ###
#l_output = DenseLayer(l_hidden1_dropout, num_units=10, nonlinearity=softmax)
prediction = get_output(l_output)
# target_var = T.ftensor4('true_output') ###

loss = squared_error(prediction, target_var)
loss = loss.mean()
# loss.mean()
# loss_train = squared_error(lasagne.layers.get_output(l_output, deterministic=False), true_output).mean()
# loss_eval = squared_error(lasagne.layers.get_output(l_output, deterministic=True), true_output).mean()

all_params = lasagne.layers.get_all_params(l_output, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss,
                                            all_params,
                                            learning_rate=0.001,
                                            momentum=0.985)

train_fn = theano.function([input_var, target_var], loss, updates=updates)
# get_output = theano.function([l_in.input_var], lasagne.layers.get_output(l_output, deterministic=True))

BATCH_SIZE = 10
예제 #50
0
    def __init__(self,
                 dim,
                 mode,
                 l2,
                 l1,
                 batch_norm,
                 dropout,
                 batch_size,
                 input_dim=76,
                 **kwargs):

        print "==> not used params in network class:", kwargs.keys()

        self.dim = dim
        self.mode = mode
        self.l2 = l2
        self.l1 = l1
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.batch_size = batch_size

        self.input_var = T.tensor3('X')
        self.input_lens = T.ivector('L')
        self.target_var = T.vector('y')

        print "==> Building neural network"
        network = layers.InputLayer((None, None, input_dim),
                                    input_var=self.input_var)
        network = layers.LSTMLayer(
            incoming=network,
            num_units=dim,
            only_return_final=False,
            grad_clipping=10,
            ingate=lasagne.layers.Gate(W_in=Orthogonal(),
                                       W_hid=Orthogonal(),
                                       W_cell=Normal(0.1)),
            forgetgate=lasagne.layers.Gate(W_in=Orthogonal(),
                                           W_hid=Orthogonal(),
                                           W_cell=Normal(0.1)),
            cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh,
                                     W_in=Orthogonal(),
                                     W_hid=Orthogonal()),
            outgate=lasagne.layers.Gate(W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)))
        lstm_output = layers.get_output(network)
        self.params = layers.get_all_params(network, trainable=True)
        self.reg_params = layers.get_all_params(network, regularizable=True)

        # for each example in minibatch take the last output
        last_outputs = []
        for index in range(self.batch_size):
            last_outputs.append(lstm_output[index,
                                            self.input_lens[index] - 1, :])
        last_outputs = T.stack(last_outputs)

        network = layers.InputLayer(shape=(self.batch_size, self.dim),
                                    input_var=last_outputs)
        network = layers.DenseLayer(incoming=network,
                                    num_units=1,
                                    nonlinearity=rectify)

        self.prediction = layers.get_output(network)
        self.params += layers.get_all_params(network, trainable=True)
        self.reg_params += layers.get_all_params(network, regularizable=True)

        self.loss_mse = squared_error(self.prediction, self.target_var).mean()
        if self.l2 > 0:
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params)
        else:
            self.loss_l2 = T.constant(0)

        if self.l1 > 0:
            self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params)
        else:
            self.loss_l1 = T.constant(0)

        self.loss_reg = self.loss_l1 + self.loss_l2

        self.loss = self.loss_mse + self.loss_reg

        #updates = lasagne.updates.adadelta(self.loss, self.params,
        #                                    learning_rate=0.001)
        #updates = lasagne.updates.momentum(self.loss, self.params,
        #                                    learning_rate=0.00003)
        #updates = lasagne.updates.adam(self.loss, self.params)
        updates = lasagne.updates.adam(
            self.loss, self.params, beta1=0.5,
            learning_rate=0.0001)  # from DCGAN paper
        #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9,
        #                                             learning_rate=0.001,

        ## compiling theano functions
        if self.mode == 'train':
            print "==> compiling train_fn"
            self.train_fn = theano.function(
                inputs=[self.input_var, self.input_lens, self.target_var],
                outputs=[self.prediction, self.loss, self.loss_reg],
                updates=updates)

        print "==> compiling test_fn"
        self.test_fn = theano.function(
            inputs=[self.input_var, self.input_lens, self.target_var],
            outputs=[self.prediction, self.loss, self.loss_reg])
예제 #51
0
def net_dict_ae_rnn(seq_length):
    NUM_FILTERS = 8
    return dict(
        epochs=None,
        save_plot_interval=5000,
        loss_function=lambda x, t: squared_error(x, t).mean(),
        updates_func=nesterov_momentum,
        learning_rate=1e-2,
        learning_rate_changes_by_iteration={
            110000: 1e-3
        },
        do_save_activations=True,
        auto_reshape=False,
        plotter=Plotter(
            n_seq_to_plot=32,
            n_training_examples_to_plot=16
        ),
        layers_config=[
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1)  # (batch, features, time)
            },
            {
                'label': 'conv0',
                'type': Conv1DLayer,  # convolve over the time axis
                'num_filters': NUM_FILTERS,
                'filter_size': 4,
                'stride': 1,
                'nonlinearity': None,
                'pad': 'valid'
            },
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1)  # back to (batch, time, features)
            },
            {
                'type': DenseLayer,
                'num_units': (seq_length - 3) * NUM_FILTERS,
                'nonlinearity': rectify
            },
            {
                'type': DenseLayer,
                'num_units': 128,
                'nonlinearity': rectify
            },
            {
                'type': DenseLayer,
                'num_units': (seq_length - 3) * NUM_FILTERS,
                'nonlinearity': rectify
            },
            {
                'type': ReshapeLayer,
                'shape': (-1, (seq_length - 3), NUM_FILTERS)
            },
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1)  # (batch, features, time)
            },
            {   # DeConv
                'type': Conv1DLayer,
                'num_filters': 1,
                'filter_size': 4,
                'stride': 1,
                'nonlinearity': None,
                'pad': 'full'
            },
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1),  # back to (batch, time, features)
                'label': 'AE_output'
            }
        ],
        layer_changes={
            100001: {
                'new_layers': [
                    {
                        'type': ConcatLayer,
                        'axis': 2,
                        'incomings': ['input', 'AE_output']
                    },
                    {
                        'type': ReshapeLayer,
                        'shape': (64 * seq_length, 2)
                    },
                    {
                        'type': DenseLayer,
                        'num_units': 16,
                        'nonlinearity': tanh
                    },
                    {
                        'type': ReshapeLayer,
                        'shape': (64, seq_length, 16)
                    },
                    {
                        'type': BLSTMLayer,
                        'num_units': 128,
                        'merge_mode': 'concatenate',
                        'grad_clipping': 10.0,
                        'gradient_steps': 500
                    },
                    {
                        'type': BLSTMLayer,
                        'num_units': 256,
                        'merge_mode': 'concatenate',
                        'grad_clipping': 10.0,
                        'gradient_steps': 500
                    },
                    {
                        'type': ReshapeLayer,
                        'shape': (64 * seq_length, 512)
                    },
                    {
                        'type': DenseLayer,
                        'num_units': 128,
                        'nonlinearity': tanh
                    },
                    {
                        'type': DenseLayer,
                        'num_units': 1,
                        'nonlinearity': None
                    }
                ]
            }
        }
    )
예제 #52
0
INPUT_STATS = {
    'mean': np.array([297.87216187], dtype=np.float32),
    'std': np.array([374.43884277], dtype=np.float32)
}


def only_train_on_real_data(net, iteration):
    net.logger.info(
        "Iteration {}: Now only training on real data.".format(iteration))
    net.source.sources[0]['train_probability'] = 0.0
    net.source.sources[1]['train_probability'] = 1.0


net_dict = dict(
    save_plot_interval=SAVE_PLOT_INTERVAL,
    loss_function=lambda x, t: squared_error(x, t).mean(),
    updates_func=nesterov_momentum,
    learning_rate=1e-4,
    learning_rate_changes_by_iteration={
        400000: 1e-5,
        500000: 1e-6
    },
    epoch_callbacks={
        350000: only_train_on_real_data
    },
    do_save_activations=True,
    auto_reshape=False,
    layers_config=[
        {
            'type': DimshuffleLayer,
            'pattern': (0, 2, 1)  # (batch, features, time)
예제 #53
0
def get_model():

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.matrix('targets')

    # input layer with unspecified batch size
    layer_0 = InputLayer(shape=(None, 30, 64, 64), input_var=input_var)

    # Z-score?

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_1 = batch_norm(
        Conv2DLayer(layer_0,
                    64, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_2 = batch_norm(
        Conv2DLayer(layer_1,
                    64, (3, 3),
                    pad='valid',
                    nonlinearity=leaky_rectify))
    layer_3 = MaxPool2DLayer(layer_2,
                             pool_size=(2, 2),
                             stride=(2, 2),
                             pad=(1, 1))
    layer_4 = DropoutLayer(layer_3, p=0.25)

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_5 = batch_norm(
        Conv2DLayer(layer_4,
                    96, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_6 = batch_norm(
        Conv2DLayer(layer_5,
                    96, (3, 3),
                    pad='valid',
                    nonlinearity=leaky_rectify))
    layer_7 = MaxPool2DLayer(layer_6,
                             pool_size=(2, 2),
                             stride=(2, 2),
                             pad=(1, 1))
    layer_8 = DropoutLayer(layer_7, p=0.25)

    # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer
    layer_9 = batch_norm(
        Conv2DLayer(layer_8,
                    128, (3, 3),
                    pad='same',
                    nonlinearity=leaky_rectify))
    layer_10 = batch_norm(
        Conv2DLayer(layer_9,
                    128, (3, 3),
                    pad='valid',
                    nonlinearity=leaky_rectify))
    layer_11 = MaxPool2DLayer(layer_10,
                              pool_size=(2, 2),
                              stride=(2, 2),
                              pad=(1, 1))
    layer_12 = DropoutLayer(layer_11, p=0.25)

    # Last layers
    layer_13 = FlattenLayer(layer_12)
    layer_14 = DenseLayer(layer_13, 1024, nonlinearity=leaky_rectify)
    layer_15 = DropoutLayer(layer_14, p=0.5)
    layer_16 = DenseLayer(layer_15, 600, nonlinearity=softmax)

    # Loss
    prediction = get_output(layer_16)
    loss = squared_error(prediction, target_var)
    loss = loss.mean() + regularize_layer_params(layer_14, l2)

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params = get_all_params(layer_16, trainable=True)
    updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction = get_output(layer_16, deterministic=True)
    test_loss = squared_error(test_prediction, target_var)
    test_loss = test_loss.mean()

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], test_loss)

    # Compule a third function computing the prediction
    predict_fn = theano.function([input_var], test_prediction)

    return [layer_16, train_fn, val_fn, predict_fn]
def run_network(data=None, num_epochs=10, ratio=0.5):
    try:

        global_start_time = time()
        sequence_length = 50
        batchsize = 512
        path_to_dataset = 'household_power_consumption.txt'

        # Loading the data

        if data is None:
            print 'Loading data... '
            X_train, y_train, X_test, y_test = data_power_consumption(
                path_to_dataset, sequence_length, ratio)
        else:
            X_train, y_train, X_test, y_test = data

        val_ratio = 0.005
        val_rows = round(val_ratio * X_train.shape[0])

        X_val = X_train[:val_rows]
        y_val = y_train[:val_rows]
        y_val = np.reshape(y_val, (y_val.shape[0], 1))
        X_train = X_train[val_rows:]
        y_train = y_train[val_rows:]
        

        # Creating the Theano variables
        input_var = T.tensor3('inputs')
        target_var = T.matrix('targets')

        # Building the Theano expressions on these variables
        network = build_model(input_var)

        prediction = lasagne.layers.get_output(network)
        loss = squared_error(prediction, target_var)
        loss = aggregate(loss)

        params = lasagne.layers.get_all_params(network, trainable=True)
        updates = rmsprop(loss, params, learning_rate=0.001)

        test_prediction = lasagne.layers.get_output(network,
                                                    deterministic=True)
        test_loss = squared_error(test_prediction, target_var)
        test_loss = aggregate(test_loss)

        # Compiling the graph by declaring the Theano functions
        compile_time = time()

        print 'Data:'
        print 'X_train ', X_train.shape, ' y_train ', y_train.shape
        print 'X_val ', X_val.shape, ' y_val ', y_val.shape
        print 'X_test ', X_test.shape, ' y_test ', y_test.shape

        print "Compiling..."
        train_fn = theano.function([input_var, target_var],
                                   loss, updates=updates)
        val_fn = theano.function([input_var, target_var],
                                 test_loss)
        get_pred_fn = theano.function([input_var], prediction)
        print "Compiling time : ", time() - compile_time

        # For loop that goes each time through the hole training
        # and validation data
        # T R A I N I N G
        # - - - - - - - -
        print "Starting training...\n"
        for epoch in range(num_epochs):

            # Going over the training data
            train_err = 0
            train_batches = 0
            start_time = time()
            nb_batches = X_train.shape[0] / batchsize
            time_line = np.zeros(nb_batches)
            for batch in iterate_minibatches(X_train, y_train,
                                             batchsize, shuffle=True):
                current_time = time()
                inputs, targets = batch
                train_err += train_fn(inputs, targets)
                train_batches += 1
                str_out = "\rTrain Batch  " + str(train_batches)
                str_out += "/" + str(nb_batches)
                str_out += "  |  Loss : " + str(train_err / train_batches)[:7]
                str_out += "  |  Remaining time (s) : "
                remaining_seconds = time() - current_time
                remaining_seconds *= (nb_batches - train_batches)
                time_line[train_batches - 1] = round(remaining_seconds)
                if (train_batches - 1) % 5 == 0:
                    durations = time_line[train_batches-1: train_batches+50]
                    durations = np.mean([t for t in durations if t > 0])
                str_out += str(durations)
                sys.stdout.write(str_out)
                sys.stdout.flush()

            print "\nGoing through validation data"
            # Going over the validation data
            val_err = 0
            val_batches = 0
            for batch in iterate_minibatches(
                    X_val, y_val, batchsize, shuffle=False):
                inputs, targets = batch
                err = val_fn(inputs, targets)
                val_err += err
                val_batches += 1

            # Then we print the results for this epoch:
            # train_batches - 1 because started at 1 and not 0
            print "training loss:\t\t\t" + str(train_err / train_batches)
            print "validation loss:\t\t" + str(val_err / val_batches)
            print("Epoch {} of {} took {:.3f}s \n\n".format(
                epoch + 1, num_epochs, time() - start_time))

        # Now that the training is over, let's test the network:
        test_err = 0
        test_batches = 0
        for batch in iterate_minibatches(
                X_test, y_test, batchsize, shuffle=False):
            inputs, targets = batch
            err = val_fn(inputs, targets)
            test_err += err
            test_batches += 1
        print "\nFinal results in {0} seconds:".format(
            time()-global_start_time)
        print "Test loss:\t\t\t{:.6f}".format(test_err / test_batches)

        prediction_size = 200
        predicted = get_pred_fn(X_test[:prediction_size])

        try:
            plt.plot(predicted)
            plt.plot(y_test[prediction_size])
            plt.show(block=False)
        except Exception as e:
            print str(e)
            print "predicted = ", repr(
                np.reshape(predicted[:prediction_size], (prediction_size,)))
            print '\n'
            print "y = ", repr(
                np.reshape(y_test[:prediction_size], (prediction_size,)))
        return network
    except KeyboardInterrupt:
        return network
예제 #55
0
def get_elementwise_objective_components(policy,
                                         rewards,
                                         policy_values,
                                         action_values='same',
                                         is_alive="always",
                                         n_steps=None,
                                         gamma_or_gammas=0.99,
                                         crop_last = True,
                                         force_values_after_end=True,
                                         state_values_after_end="zeros",
                                         consider_value_reference_constant=True,
                                         consider_predicted_value_constant=True,
                                         scan_dependencies=tuple(),
                                         scan_strict=True,
                                         ):
    """
    returns deterministic policy gradient components for actor and critic

        L_policy = -critic(state,policy) = -action_values 
        L_V = (V - Vreference)^2
        
        You will have to independently compute updates for actor and critic and then add them up.
            
    parameters:
    
        policy [batch,tick,action_id] - predicted "optimal policy" (mu)
        rewards [batch,tick] - immediate rewards for taking actions at given time ticks
        policy_values [batch,tick] - predicted state values given OPTIMAL policy
        action_values [batch,tick] - predicted Q_values for commited actions INCLUDING EXPLORATION if any
                            Default value implies action_values = state_values if we have no exploration
        
        is_alive [batch,tick] - whether given session is still active at given tick. Defaults to always active.
                            Default value of is_alive implies a simplified computation algorithm for Qlearning loss
        
        n_steps: if an integer is given, the references are computed in loops of 3 states.
            Defaults to None: propagating rewards throughout the whole session.
            If n_steps equals 1, this works exactly as Q-learning (though less efficient one)
            If you provide symbolic integer here AND strict = True, make sure you added the variable to dependencies.
        
        gamma_or_gammas - a single value or array[batch,tick](can broadcast dimensions) of delayed reward discounts 
        
        crop_last - if True, zeros-out loss at final tick, if False - computes loss VS Qvalues_after_end
        
        force_values_after_end - if true, sets reference policy at session end to rewards[end] + qvalues_after_end
        
        state_values_after_end[batch,1,n_actions] - "next state values" for last tick used for reference only. 
                            Defaults at  T.zeros_like(state_values[:,0,None,:])
                            If you wish to simply ignore the last tick, use defaults and crop output's last tick ( qref[:,:-1] )

        
        
        scan_dependencies: everything you need to evaluate first 3 parameters (only if strict==True)
        scan_strict: whether to evaluate values using strict theano scan or non-strict one
        
    Returns:
                
        Element-wise sum of policy_loss + state_value_loss

    """

    if action_values == 'same':
        action_values = policy_values

    # get reference values via DPG algorithm
    reference_action_values = get_n_step_value_reference(action_values,
                                                         rewards,
                                                         is_alive,
                                                         n_steps=n_steps,
                                                         optimal_state_values_after_end=state_values_after_end,
                                                         gamma_or_gammas=gamma_or_gammas,
                                                         dependencies=scan_dependencies,
                                                         strict=scan_strict
                                                         )

    if is_alive != "always" and force_values_after_end:
        # if asked to force reference_Q[end_tick+1,a] = 0, do it
        # note: if agent is always alive, this is meaningless

        # set future rewards at session end to rewards+qvalues_after_end
        end_ids = get_end_indicator(is_alive, force_end_at_t_max=True).nonzero()

        if state_values_after_end == "zeros":
            # "set reference state values at end action ids to just the immediate rewards"
            reference_action_values = T.set_subtensor(reference_action_values[end_ids], rewards[end_ids])
        else:
            # "set reference state values at end action ids to the immediate rewards + qvalues after end"
            new_subtensor_values = rewards[end_ids] + gamma_or_gammas * state_values_after_end[end_ids[0], 0]
            reference_action_values = T.set_subtensor(reference_action_values[end_ids], new_subtensor_values)

    # now compute the loss components
    if is_alive == "always":
        is_alive = T.ones_like(action_values, dtype=theano.config.floatX)

    # actor loss
    # here we rely on fact that state_values = critic(state,optimal_policy)
    # using chain rule,
    # grad(state_values,actor_weights) = grad(state_values, optimal_policy)*grad(optimal_policy,actor_weights)
    policy_loss_elwise = -policy_values

    # critic loss
    reference_action_values = consider_constant(reference_action_values)
    V_err_elementwise = squared_error(reference_action_values, action_values)
    
    if crop_last:
        V_err_elementwise = T.set_subtensor(V_err_elementwise[:,-1],0)


    return policy_loss_elwise * is_alive, V_err_elementwise * is_alive
# Get all trainable params
params = layers.get_all_params(unsupervised_graph, trainable=True) + \
         layers.get_all_params(supervised_graph, trainable=True)
# params = layers.get_all_params(supervised_graph)[-2:]
params = utils.unique(params)

# Get regularizable params
regularization_params = layers.get_all_params(unsupervised_graph, regularizable=True) + \
                        layers.get_all_params(supervised_graph, regularizable=True)
regularization_params = utils.unique(regularization_params)

# Creating loss functions
# Train loss has to take into account of labeled image or not
if run_parameters.unsupervised_cost_fun == 'squared_error':
    loss1 = objectives.squared_error(reconstruction, input_var)
elif run_parameters.unsupervised_cost_fun == 'categorical_crossentropy':
    loss1 = objectives.categorical_crossentropy(reconstruction, input_var)
if supervised_cost_fun == 'squared_error':
    loss2 = objectives.squared_error(prediction, target_var) * repeat_col(labeled_var, 10)
elif supervised_cost_fun == 'categorical_crossentropy':
    loss2 = objectives.categorical_crossentropy(prediction, target_var) * labeled_var.T
l2_penalties = regularization.apply_penalty(regularization_params, regularization.l2)
sparse_layers = get_all_sparse_layers(unsupervised_graph)
sparse_layers_output = layers.get_output(sparse_layers, deterministic=True)
if run_parameters.sparse_regularizer_type == 0:
    sparse_regularizer = reduce(lambda x, y: x + T.clip((T.mean(abs(y)) - run_parameters.sparse_regularize_factor) *
                                                        y.size, 0, float('inf')),
                                sparse_layers_output, 0)
elif run_parameters.sparse_regularizer_type == 1:
    sparse_regularizer = reduce(
예제 #57
0
def get_model(input_var, target_var, multiply_var):

    # input layer with unspecified batch size
    layer_input = InputLayer(
        shape=(None, 30, 80, 80), input_var=input_var
    )  #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer_0 = DimshuffleLayer(layer_input, (0, 'x', 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_1 = batch_norm(
        Conv3DDNNLayer(incoming=layer_0,
                       num_filters=64,
                       filter_size=(3, 3, 3),
                       stride=(1, 3, 3),
                       pad='same',
                       nonlinearity=leaky_rectify))
    layer_2 = MaxPool3DDNNLayer(layer_1,
                                pool_size=(1, 2, 2),
                                stride=(1, 2, 2),
                                pad=(0, 1, 1))
    layer_3 = DropoutLayer(layer_2, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_4 = batch_norm(
        Conv3DDNNLayer(incoming=layer_3,
                       num_filters=128,
                       filter_size=(3, 3, 3),
                       stride=(1, 3, 3),
                       pad='same',
                       nonlinearity=leaky_rectify))
    layer_5 = MaxPool3DDNNLayer(layer_4,
                                pool_size=(1, 2, 2),
                                stride=(1, 2, 2),
                                pad=(0, 1, 1))
    layer_6 = DropoutLayer(layer_5, p=0.25)

    # Recurrent layer
    layer_7 = DimshuffleLayer(layer_6, (0, 2, 1, 3, 4))
    layer_8 = LSTMLayer(layer_7, num_units=612, only_return_final=True)
    layer_9 = DropoutLayer(layer_8, p=0.25)

    # Output Layer
    layer_hidden = DenseLayer(layer_9, 500, nonlinearity=sigmoid)
    layer_prediction = DenseLayer(layer_hidden, 2, nonlinearity=linear)

    # Loss
    prediction = get_output(layer_prediction) / multiply_var
    loss = squared_error(prediction, target_var)
    loss = loss.mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction = get_output(layer_prediction,
                                 deterministic=True) / multiply_var
    test_loss = squared_error(test_prediction, target_var)
    test_loss = test_loss.mean()

    # crps estimate
    crps = T.abs_(test_prediction - target_var).mean() / 600

    return test_prediction, crps, loss, params
예제 #58
0
    def __init__(self,
                 atari_env,
                 state_dimension,
                 action_dimension,
                 monitor_env=False,
                 learning_rate=0.001,
                 critic_update=10,
                 train_step=1,
                 gamma=0.95,
                 eps_max=1.0,
                 eps_min=0.1,
                 eps_decay=10000,
                 n_epochs=10000,
                 batch_size=32,
                 buffer_size=50000):

        self.env = gym.make(atari_env)
        if monitor_env:
            None

        self.state_dimension = state_dimension
        self.action_dimension = action_dimension
        self.learning_rate = learning_rate
        self.critic_update = critic_update
        self.train_step = train_step
        self.gamma = gamma
        self.eps_max = eps_max
        self.eps_min = eps_min
        self.eps_decay = eps_decay
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.buffer_size = buffer_size

        self.experience_replay = []

        def q_network(state):
            input_state = InputLayer(input_var=state,
                                     shape=(None, self.state_dimension[0],
                                            self.state_dimension[1],
                                            self.state_dimension[2]))

            input_state = DimshuffleLayer(input_state, pattern=(0, 3, 1, 2))

            conv = Conv2DLayer(input_state,
                               num_filters=32,
                               filter_size=(8, 8),
                               stride=(4, 4),
                               nonlinearity=rectify)

            conv = Conv2DLayer(conv,
                               num_filters=64,
                               filter_size=(4, 4),
                               stride=(2, 2),
                               nonlinearity=rectify)

            conv = Conv2DLayer(conv,
                               num_filters=64,
                               filter_size=(3, 3),
                               stride=(1, 1),
                               nonlinearity=rectify)

            flatten = FlattenLayer(conv)

            dense = DenseLayer(flatten, num_units=512, nonlinearity=rectify)

            q_values = DenseLayer(dense,
                                  num_units=self.action_dimension,
                                  nonlinearity=linear)

            return q_values

        self.X_state = T.ftensor4()
        self.X_action = T.bvector()
        self.X_reward = T.fvector()
        self.X_next_state = T.ftensor4()
        self.X_done = T.bvector()

        self.X_action_hot = to_one_hot(self.X_action, self.action_dimension)

        self.q_ = q_network(self.X_state)
        self.q = get_output(self.q_)
        self.q_target_ = q_network(self.X_next_state)
        self.q_target = get_output(self.q_target_)
        self.q_max = T.max(self.q_target, axis=1)
        self.action = T.argmax(self.q, axis=1)

        self.mu = theano.function(inputs=[self.X_state],
                                  outputs=self.action,
                                  allow_input_downcast=True)

        self.loss = squared_error(
            self.X_reward + self.gamma * self.q_max * (1.0 - self.X_done),
            T.batched_dot(self.q, self.X_action_hot))
        self.loss = self.loss.mean()

        self.params = get_all_params(self.q_)

        self.grads = T.grad(self.loss, self.params)

        self.normed_grads = total_norm_constraint(self.grads, 1.0)

        self.updates = rmsprop(self.normed_grads,
                               self.params,
                               learning_rate=self.learning_rate)

        self.update_network = theano.function(inputs=[
            self.X_state, self.X_action, self.X_reward, self.X_next_state,
            self.X_done
        ],
                                              outputs=self.loss,
                                              updates=self.updates,
                                              allow_input_downcast=True)
예제 #59
0
def net_dict_rectangles(seq_length):
    return dict(
        epochs=300000,
        save_plot_interval=25000,
        loss_function=lambda x, t: squared_error(x, t).mean(),
        updates_func=nesterov_momentum,
        learning_rate=1e-4,
        learning_rate_changes_by_iteration={
            200000: 1e-5,
            250000: 1e-6
        },
        epoch_callbacks={350000: only_train_on_real_data},
        do_save_activations=True,
        auto_reshape=False,
        plotter=StartEndMeanPlotter(n_seq_to_plot=32,
                                    n_training_examples_to_plot=16),
        layers_config=[
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1)  # (batch, features, time)
            },
            {
                'type': PadLayer,
                'width': 4
            },
            {
                'type': Conv1DLayer,  # convolve over the time axis
                'num_filters': 16,
                'filter_size': 4,
                'stride': 1,
                'nonlinearity': None,
                'border_mode': 'valid'
            },
            {
                'type': Conv1DLayer,  # convolve over the time axis
                'num_filters': 16,
                'filter_size': 4,
                'stride': 1,
                'nonlinearity': None,
                'border_mode': 'valid'
            },
            {
                'type': DimshuffleLayer,
                'pattern': (0, 2, 1)  # back to (batch, time, features)
            },
            {
                'type': DenseLayer,
                'num_units': 512 * 8,
                'nonlinearity': rectify
            },
            # {
            #     'type': DenseLayer,
            #     'num_units': 512 * 6,
            #     'nonlinearity': rectify
            # },
            {
                'type': DenseLayer,
                'num_units': 512 * 4,
                'nonlinearity': rectify
            },
            {
                'type': DenseLayer,
                'num_units': 512,
                'nonlinearity': rectify
            },
            {
                'type': DenseLayer,
                'num_units': 3,
                'nonlinearity': None
            }
        ])