Ejemplo n.º 1
0
    def build_loss(self):
        net = self.net['out']
        prediction = lasagne.layers.get_output(net)
        prediction = T.clip(prediction, 1e-9, 1 - 1e-9)
        loss = lasagne.objectives.categorical_crossentropy(prediction, self.target_var)
        loss = loss.mean() + self.lambda2 * regularization.regularize_network_params(net, regularization.l2)
        
        params = lasagne.layers.get_all_params(net, trainable=True)
        updates = lasagne.updates.nesterov_momentum(
                loss, params, learning_rate=self.learning_rate,momentum = 0.9)

         
        test_prediction = lasagne.layers.get_output(net, deterministic=True)
        test_prediction = T.clip(test_prediction, 1e-9, 1 - 1e-9)
        test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                                self.target_var)
        
        test_loss = test_loss.mean() + self.lambda2 * regularization.regularize_network_params(net, 
                                                                                               regularization.l2)
        
        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), self.target_var),
                          dtype=theano.config.floatX)

        
        train_fn = theano.function([self.input_var, self.target_var], loss, updates=updates)
        pred_fn = theano.function([self.input_var], lasagne.layers.get_output(net,deterministic=True))
        val_fn = theano.function([self.input_var, self.target_var], [test_loss, test_acc])
        
        return train_fn,val_fn,pred_fn
Ejemplo n.º 2
0
def my_loss(model, predictions, targets, regularization, params):
    predictions = predictions[0][0][
        params['left_border']:-params['right_border']]
    targets = targets[0][params['left_border']:-params['right_border']]
    loss = tensor.abs_(tensor.log((targets * predictions).sum() / targets.sum())) +\
           tensor.abs_(tensor.log(((1-targets) * (1-predictions)).sum() / (1-targets).sum()))
    reg_loss_l1 = regularize_network_params(model, l1) * 1e-4
    reg_loss_l2 = regularize_network_params(model, l2)
    if regularization:
        return loss + reg_loss_l1  # + reg_loss_l2
    else:
        return loss
Ejemplo n.º 3
0
    def build_loss(self, env, agent, replay_seq_len):
        # get agent's Qvalues obtained via experience replay
        _, _, _, _, qvalues_seq = agent.get_sessions(
            env,
            # initial_hidden = env.preceding_agent_memories,
            session_length=replay_seq_len,
            batch_size=env.batch_size,
            optimize_experience_replay=True,
        )

        scaled_reward_seq = env.rewards

        elwise_mse_loss = qlearning_n_step.get_elementwise_objective(qvalues_seq,
                                                                     env.actions[0],
                                                                     scaled_reward_seq,
                                                                     env.is_alive,
                                                                     gamma_or_gammas=self.gamma,
                                                                     n_steps=self.n_steps)

        # compute mean over "alive" fragments
        mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

        # regularize network weights

        reg_l2 = regularize_network_params(agent.state_variables.keys(), l2) * 10 ** -5

        return mse_loss + reg_l2
Ejemplo n.º 4
0
def make_training_functions(network, encode_layer, input_var, aug_var,
                            target_var, stack_params, weight_decay):
    output = lasagne.layers.get_output(network, deterministic=True)
    loss = lasagne.objectives.squared_error(output, target_var).mean() + \
           weight_decay * regularization.regularize_network_params(
                   layer = network, penalty = regularization.l2, tags={'regularizable' : True})

    params = layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=0.0001,
                                                momentum=0.95)
    stack_updates = lasagne.updates.nesterov_momentum(loss,
                                                      stack_params,
                                                      learning_rate=0.0001,
                                                      momentum=0.95)

    encode = lasagne.layers.get_output(encode_layer, deterministic=True)

    val_fn = theano.function([input_var, aug_var, target_var],
                             [loss, encode, output])
    train_fn = theano.function([input_var, aug_var, target_var],
                               loss,
                               updates=updates)
    stack_train_fn = theano.function([input_var, aug_var, target_var],
                                     loss,
                                     updates=stack_updates)

    return val_fn, train_fn, stack_train_fn
Ejemplo n.º 5
0
    def build_loss(self, env):

        _, _, _, _, qvalues_seq = self.agent.get_sessions(
            env,
            session_length=self.replay_seq_len,
            batch_size=self.replay_batch_size,
            optimize_experience_replay=True,
            # unroll_scan=,
        )
        scaled_reward_seq = env.rewards

        elwise_mse_loss = qlearning_n_step.get_elementwise_objective(qvalues_seq,
                                                                     env.actions[0],
                                                                     scaled_reward_seq,
                                                                     env.is_alive,
                                                                     n_steps=self.n_steps,
                                                                     gamma_or_gammas=self.gamma, )

        mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

        reg_l2 = regularize_network_params(self.resolver, l2) * 10 ** -4

        loss = mse_loss + reg_l2

        return loss
Ejemplo n.º 6
0
 def __init__(self, istrained, name=None, args=None):
     self.istrained = istrained
     self.X = T.tensor4('X')
     self.y = T.ivector('y')
     self.outprob = build_model(self.X)
     if self.istrained:
         params = cPickle.load(open(dataset_path + 'plain_cnn.pkl', 'r'))
         layers.set_all_param_values(self.outprob, params)
         self.yFullProb = layers.get_output(self.outprob, deterministic=True)
         self.predfn = makeFunc([self.X, ], [self.yFullProb, ], None)
     else:
         self.lr, self.C, self.momentum = args
         self.params = layers.get_all_params(self.outprob, trainable=True)
         reg = regularization.regularize_network_params(self.outprob, regularization.l2)
         reg /= layers.helper.count_params(self.outprob)
         # 训练集
         self.yDropProb = layers.get_output(self.outprob)
         trCrossentropy = objectives.categorical_crossentropy(self.yDropProb, self.y)
         self.trCost = trCrossentropy.mean() + self.C * reg
         # 验证、测试集
         self.yFullProb = layers.get_output(self.outprob, deterministic=True)
         vateCrossentropy = objectives.categorical_crossentropy(self.yFullProb, self.y)
         self.vateCost = vateCrossentropy.mean() + self.C * reg
         # 训练函数,输入训练集,输出训练损失和误差
         updatesDict = updates.nesterov_momentum(self.trCost, self.params, self.lr, self.momentum)
         self.trainfn = makeFunc([self.X, self.y], [self.trCost, self.yDropProb], updatesDict)
         # 验证或测试函数,输入验证或测试集,输出损失和误差,不进行更新
         self.vatefn = makeFunc([self.X, self.y], [self.vateCost, self.yFullProb], None)
Ejemplo n.º 7
0
    def complieTrainFunction(self):
        message = 'Compiling the Training Function'
        self.logger.info(logMessage('+', message))

        startTime = time.time()

        trainPrediction = get_output(self.outputLayer, 
                                     deterministic = False,
                                     batch_norm_update_averages=False, 
                                     batch_norm_use_averages=False)
        # TODO. Chack wheather the flatten style of targetvar and output are same.
        self.flattenedTargetVar = T.flatten(self.targetVar)

        trainLoss = categorical_crossentropy(trainPrediction, self.flattenedTargetVar).mean()
        weightNorm = regularize_network_params(self.outputLayer, lasagne.regularization.l2)
        trainLoss += self.weightDecay * weightNorm

        trainPredictionLabel = T.argmax(trainPrediction, axis = 1)
        trainACC = T.mean(T.eq(trainPredictionLabel, self.flattenedTargetVar), 
                          dtype = theano.config.floatX)
        
        params = get_all_params(self.outputLayer, trainable = True)
        update = self.optimizer(trainLoss, params, learning_rate = self.learningRate)

        trainFunc = theano.function([self.inputVar, self.targetVar], 
                                    [trainLoss, trainACC], 
                                    updates = update)
        
        message = 'Compiled the Training Function, spent {:.2f}s'.format(time.time()- startTime)
        self.logger.info(logMessage('+', message))

        return trainFunc
Ejemplo n.º 8
0
    def build_loss(self, env):

        _, _, _, _, qvalues_seq = self.agent.get_sessions(
            env,
            session_length=self.replay_seq_len,
            batch_size=self.replay_batch_size,
            optimize_experience_replay=True,
            # unroll_scan=,
        )
        scaled_reward_seq = env.rewards

        elwise_mse_loss = qlearning_n_step.get_elementwise_objective(
            qvalues_seq,
            env.actions[0],
            scaled_reward_seq,
            env.is_alive,
            n_steps=self.n_steps,
            gamma_or_gammas=self.gamma,
        )

        mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

        reg_l2 = regularize_network_params(self.resolver, l2) * 10**-4

        loss = mse_loss + reg_l2

        return loss
Ejemplo n.º 9
0
    def build_loss(self, env, agent, replay_seq_len):
        # get agent's Qvalues obtained via experience replay
        _, _, _, _, qvalues_seq = agent.get_sessions(
            env,
            # initial_hidden = env.preceding_agent_memories,
            session_length=replay_seq_len,
            batch_size=env.batch_size,
            optimize_experience_replay=True,
        )

        scaled_reward_seq = env.rewards

        elwise_mse_loss = qlearning_n_step.get_elementwise_objective(
            qvalues_seq,
            env.actions[0],
            scaled_reward_seq,
            env.is_alive,
            gamma_or_gammas=self.gamma,
            n_steps=self.n_steps)

        # compute mean over "alive" fragments
        mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

        # regularize network weights

        reg_l2 = regularize_network_params(agent.state_variables.keys(),
                                           l2) * 10**-5

        return mse_loss + reg_l2
Ejemplo n.º 10
0
def create_iter_funcs_train(l_out, lr, mntm, wd):
    X = T.tensor4('X')
    y = T.ivector('y')
    X_batch = T.tensor4('X_batch')
    y_batch = T.ivector('y_batch')

    y_hat = layers.get_output(l_out, X, deterministic=False)

    # softmax loss
    train_loss = T.mean(T.nnet.categorical_crossentropy(y_hat, y))

    # L2 regularization
    train_loss += wd * regularize_network_params(l_out, l2)

    train_acc = T.mean(T.eq(y_hat.argmax(axis=1), y))

    all_params = layers.get_all_params(l_out, trainable=True)
    updates = lasagne.updates.nesterov_momentum(train_loss, all_params, lr,
                                                mntm)

    train_iter = theano.function(
        inputs=[theano.Param(X_batch),
                theano.Param(y_batch)],
        outputs=[train_loss, train_acc],
        updates=updates,
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    return train_iter
Ejemplo n.º 11
0
    def __build_loss_train__fn__(self):
        # create loss function
        prediction = layers.get_output(self.net)
        loss = objectives.categorical_crossentropy(prediction,
                                                   self.__target_var__)
        loss = loss.mean() + 1e-4 * regularization.regularize_network_params(
            self.net, regularization.l2)

        val_acc = T.mean(T.eq(T.argmax(prediction, axis=1),
                              self.__target_var__),
                         dtype=theano.config.floatX)

        # create parameter update expressions
        params = layers.get_all_params(self.net, trainable=True)
        self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32))
        update_rule = updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=self.eta,
                                                momentum=0.9)

        # compile training function that updates parameters and returns training loss
        self.__train_fn__ = theano.function(
            [self.__input_var__, self.__target_var__],
            loss,
            updates=update_rule)
        self.__predict_fn__ = theano.function(
            [self.__input_var__],
            layers.get_output(self.net, deterministic=True))
        self.__val_fn__ = theano.function(
            [self.__input_var__, self.__target_var__], [loss, val_acc])
Ejemplo n.º 12
0
def define_updates(network, input_var, target_var, weight_var, learning_rate=0.01, momentum=0.9, l2_lambda=1e-5):
    params = lasagne.layers.get_all_params(network, trainable=True)

    out = lasagne.layers.get_output(network)
    test_out = lasagne.layers.get_output(network, deterministic=True)

    l2_loss = l2_lambda * regularize_network_params(network, l2)

    train_metrics = _score_metrics(out, target_var, weight_var, l2_loss)
    loss, acc, target_prediction, prediction = train_metrics

    val_metrics = _score_metrics(test_out, target_var, weight_var, l2_loss)
    t_loss, t_acc, t_target_prediction, t_prediction = val_metrics


    updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=learning_rate, momentum=momentum)

    train_fn = theano.function([input_var, target_var, weight_var],[
                                loss, l2_loss, acc, target_prediction, prediction],
                                updates=updates)

    val_fn = theano.function([input_var, target_var, weight_var], [
                                t_loss, l2_loss, t_acc, t_target_prediction, t_prediction])


    return train_fn, val_fn
Ejemplo n.º 13
0
    def compileValFunction(self):

        message = 'Compiling the Validation Function'
        self.logger.info(logMessage('+', message))

        startTime = time.time()

        valPrediction = get_output(self.outputLayer, 
                                     deterministic = True,
                                     batch_norm_update_averages=False, 
                                     batch_norm_use_averages=False)
        # TODO. Chack wheather the flatten style of targetvar and output are same.
        self.flattenedTargetVar = T.flatten(self.targetVar)

        valLoss = categorical_crossentropy(valPrediction, self.flattenedTargetVar).mean()
        weightNorm = regularize_network_params(self.outputLayer, lasagne.regularization.l2)
        valLoss += self.weightDecay * weightNorm

        valPredictionLabel = T.argmax(valPrediction, axis = 1)
        valACC = T.mean(T.eq(valPredictionLabel, self.flattenedTargetVar), 
                        dtype = theano.config.floatX)

        valFunc = theano.function([self.inputVar, self.targetVar], 
                                  [valLoss, valACC])
        
        message = 'Compiled the Validation Function, spent {:.2f}s'.format(time.time()- startTime)
        self.logger.info(logMessage('+', message))

        return valFunc
Ejemplo n.º 14
0
 def __init__(self, C, lr):
     self.C = C
     self.X = T.ftensor4()
     self.Y = T.fmatrix()
     self.net = self._forward()
     params = layers.get_all_params(self.net['flatten'], trainable=True)
     netout = layers.get_output(self.net['out'])
     flattenout = layers.get_output(self.net['flatten'])
     reg = regularization.regularize_network_params(self.net['flatten'],
                                                    regularization.l2)
     reg /= layers.helper.count_params(self.net['flatten'])
     self.flattenfn = theano.function([self.X],
                                      flattenout,
                                      allow_input_downcast=True)
     self.predictfn = theano.function([self.X],
                                      netout,
                                      allow_input_downcast=True)
     accrarcy = myUtils.basic.accuracy(netout, self.Y)
     self.scorefn = theano.function([self.X, self.Y],
                                    accrarcy,
                                    allow_input_downcast=True)
     self.sharedBeta = self.net['out'].get_params()[0]
     crossentropy = objectives.categorical_crossentropy(netout, self.Y)
     cost = T.mean(crossentropy) + C * reg
     updatesDict = updates.nesterov_momentum(cost, params, lr, 0.9)
     # 训练随机参数
     self.trainfn = theano.function([self.X, self.Y], [cost, accrarcy],
                                    updates=updatesDict,
                                    allow_input_downcast=True)
Ejemplo n.º 15
0
def create_iter_funcs_train(l_out, lr, mntm, wd):
    X = T.tensor4('X')
    y = T.ivector('y')
    X_batch = T.tensor4('X_batch')
    y_batch = T.ivector('y_batch')

    y_hat = layers.get_output(l_out, X, deterministic=False)

    # softmax loss
    train_loss = T.mean(
        T.nnet.categorical_crossentropy(y_hat, y))

    # L2 regularization
    train_loss += wd * regularize_network_params(l_out, l2)

    train_acc = T.mean(
        T.eq(y_hat.argmax(axis=1), y))

    all_params = layers.get_all_params(l_out, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
        train_loss, all_params, lr, mntm)

    train_iter = theano.function(
        inputs=[theano.Param(X_batch), theano.Param(y_batch)],
        outputs=[train_loss, train_acc],
        updates=updates,
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    return train_iter
    def _create_loss(self, output_layer, predicted, target, error_threshold, proto_loss_multiplier=1.0):
        # Regularization term
        reg_term = self.reg_weight * regularize_network_params(output_layer, l2)
        
        # Source-target penalty term:
        # distance between source and target is subtracted from loss
        # So less is learned from non-cognates

        # Phonetic: binary crossentropy, multi-label classifcation
        if self.output_encoding == "phonetic" or self.output_encoding == "embedding":
            loss = T.sum(lasagne.objectives.binary_crossentropy(predicted, target)) / self.batch_size + reg_term
        # Character: categorical crossentropy, single label classification
        elif self.output_encoding == "character":
            loss = T.sum(lasagne.objectives.categorical_crossentropy(predicted, target)) / self.batch_size + reg_term
            
        # Multiply loss with cognacy prior:
        # more should be learned from probable cognate examples
        if self.cognacy_prior > 0.0:
            target_prediction_error = T.sum(lasagne.objectives.squared_error(predicted, target)) / self.batch_size
            # sigmoid(-error+mean_error_history)
            # Cognacy prior is high for low error, but declines steeply
            # when error above mean_error_history
            cognacy_prior_factor = utility.sigmoid(-target_prediction_error + error_threshold)
            loss *= cognacy_prior_factor
        else:
            cognacy_prior_factor = T.constant(1)
            target_prediction_error = T.constant(0)
            
        loss *= proto_loss_multiplier
        return loss, cognacy_prior_factor, target_prediction_error, error_threshold
Ejemplo n.º 17
0
 def __init__(self, lr, C, momentum):
     self.lr = lr
     self.C = C
     self.momentum = momentum
     self.X = T.tensor4('X')
     self.y = T.ivector('y')
     self.network = self._build()
     self.params = layers.get_all_params(self.network, trainable=True)
     reg = regularization.regularize_network_params(self.network, regularization.l2)
     reg /= layers.helper.count_params(self.network)
     # 训练集
     yDropProb = layers.get_output(self.network)
     self.trEqs = myUtils.basic.eqs(yDropProb, self.y)
     trCrossentropy = objectives.categorical_crossentropy(yDropProb, self.y)
     self.trCost = trCrossentropy.mean() + C * reg
     # 验证、测试集
     yFullProb = layers.get_output(self.network, deterministic=True)
     self.vateEqs = myUtils.basic.eqs(yFullProb, self.y)
     vateCrossentropy = objectives.categorical_crossentropy(yFullProb, self.y)
     self.vateCost = vateCrossentropy.mean() + C * reg
     self.yPred = yFullProb
     # 训练函数,输入训练集,输出训练损失和误差
     updatesDict = updates.nesterov_momentum(self.trCost, self.params, lr, momentum)
     self.trainfn = myUtils.basic.makeFunc([self.X, self.y], [self.trCost, self.trEqs], updatesDict)
     # 验证或测试函数,输入验证或测试集,输出损失和误差,不进行更新
     self.vatefn = myUtils.basic.makeFunc([self.X, self.y], [self.vateCost, self.vateEqs], None)
Ejemplo n.º 18
0
    def compile_train_predict(self, stochastic_train, stochastic_predict):
        # symbolic functions to compute marginal posterior GP
        input_vars = self.post_gp.data_variables
        gp_hyperparams = self.post_gp.params
        self.gp_hyperparams = gp_hyperparams

        mu = self.post_gp.mean()
        mu = mu.dimshuffle('x', 0)   # make a row out of 1d vector (N to 1xN)

        self.train_network = self.extend_network(mu, stochastic_train)

        train_predict = lasagne.layers.get_output(self.train_network)

        # Compute the exepcted prediction
        #if stochastic_train and self.n_samples > 1:
        #    train_predict = train_predict.mean(axis=0, keepdims=True)

        label = T.ivector('label')

        # For expected loss
        if stochastic_train:
            label_rep = label.repeat(self.n_samples)
        else:
            label_rep = label

        loss = categorical_crossentropy(train_predict, label_rep).mean()
        # For expected prediction
        #loss = categorical_crossentropy(train_predict, label).mean()
        if self.regularize_weight > 0:
            penalty = (self.regularize_weight *
                       regularize_network_params(self.train_network, l2))
            loss += penalty

        params = lasagne.layers.get_all_params(self.train_network,
                                               trainable=True)
        update_params = params
        if self.update_gp:
            update_params += gp_hyperparams
        grad_loss = theano.grad(loss, update_params,
                                consider_constant=input_vars)
        updates = self.optimizer(grad_loss, update_params,
                                 **self.optimizer_kwargs)
        self.train_fn = theano.function(input_vars + [label],
                                        loss, updates=updates)

        if stochastic_train == stochastic_predict:
            self.test_network = self.train_network
            self.copy_params = False
        else:
            self.test_network = self.extend_network(mu, stochastic_predict)
            self.copy_params = True

        # Set deterministic=True for dropout training if used.
        test_predict = lasagne.layers.get_output(self.test_network,
                                                 deterministic=True)
        if stochastic_predict and self.n_samples > 1:
            test_predict = test_predict.mean(axis=0, keepdims=True)

        self.predict_fn = theano.function(input_vars, test_predict)
Ejemplo n.º 19
0
def train_setup():

    x = T.tensor3('input')
    y = T.matrix('output')

    encoding, decoding = cnn( x, config.input_length, config.output_length, \
                                 config.encoding_length )

    print 'Number of Parameters {0}'.format(count_params(decoding))

    if config.init_model is not None:

        with np.load(config.init_model) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]

        set_all_param_values(decoding, param_values)

    # training tasks in sequence

    prediction = get_output(decoding)

    error = squared_error(y, prediction)
    error = error.mean()

    l1_norm = config.l1_weight * regularize_network_params(decoding, l1)
    l2_norm = config.l2_weight * regularize_network_params(decoding, l2)

    total_error = error + l1_norm + l2_norm

    params = get_all_params(decoding, trainable=True)

    updates = adadelta( total_error, params, config.learning_rate, \
                                             config.rho, \
                                             config.eps )

    train_fn = function( [x, y], [error, l1_norm, l2_norm], \
                              updates = updates, \
                              allow_input_downcast = True )

    val_prediction = get_output(decoding, deterministic=True)
    val_error = squared_error(y, val_prediction)
    val_error = val_error.mean()

    val_fn = function([x, y], val_error, allow_input_downcast=True)

    return encoding, decoding, train_fn, val_fn
Ejemplo n.º 20
0
def train_setup():

    x = T.tensor3('input')
    y = T.lvector('output')

    network = cnn(x, config.input_length, config.output_length)

    print 'Number of Parameters {0}'.format(count_params(network))

    if config.init_model is not None:

        with np.load(config.init_model) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]

        set_all_param_values(decoding, param_values)

    # training tasks in sequence

    prediction = get_output(network)

    ent = categorical_crossentropy(prediction, y)
    ent = ent.mean()

    l1_norm = config.l1_weight * regularize_network_params(network, l1)
    l2_norm = config.l2_weight * regularize_network_params(network, l2)

    total_error = ent + l1_norm + l2_norm

    params = get_all_params(network, trainable=True)

    updates = adadelta( total_error, params, config.learning_rate, \
                                             config.rho, \
                                             config.eps )

    train_fn = function( [x, y], [ent, l1_norm, l2_norm, prediction], \
                              updates = updates, \
                              allow_input_downcast = True )

    val_prediction = get_output(network, deterministic=True)
    val_ent = categorical_crossentropy(val_prediction, y)
    val_ent = val_ent.mean()

    val_fn = function([x, y], [val_ent, val_prediction],
                      allow_input_downcast=True)

    return network, train_fn, val_fn
Ejemplo n.º 21
0
def get_functions(cfg):
     input_var = T.tensor4('inputs')
     target_var = T.ivector('targets')
     weights_var = T.vector('weights')
     lr=theano.shared(np.float32(0.0000))

     network = build_network(cfg,input_var)
     prediction = lasagne.layers.get_output(network)
     test_prediction = lasagne.layers.get_output(network, deterministic = True)
     output_shape = lasagne.layers.get_output_shape(network)
     l2_penalty = regularize_network_params(network, l2)
     l1_penalty = regularize_network_params(network, l1)
     cost = loss(prediction,target_var,weights_var) + 5*1e-6*(l1_penalty + l2_penalty)
     params = lasagne.layers.get_all_params(network, trainable=True)
     #print (len(params))
     def save_params(path):
          np.savez(path,params)
          return

     def load_params(path):
          data = np.load(path)
          param_values = [ x.get_value() for x in data['arr_0'] ]
        #   print (len(param_values))
          lasagne.layers.set_all_param_values(network, param_values, trainable=True)
          return

     def set_lr(value):
          lr.set_value(value)
          return

     optimiser= cfg['optimiser']
     updates = get_updates(cost,params, optimiser ,lr)

     def acc(yp,yt):
          output = T.argmax(yp,axis=1)
          return T.mean(T.eq(output, target_var))

     accuracy = acc(prediction,target_var)

     train_fn = theano.function([input_var, target_var,weights_var], cost, updates=updates)
     val_fn = theano.function([input_var, target_var], accuracy)
     train_predict_fn = theano.function([input_var], prediction)
     test_predict_fn = theano.function([input_var], test_prediction)

     return train_fn, test_predict_fn, train_predict_fn, save_params, load_params, output_shape, set_lr
Ejemplo n.º 22
0
    def __init__(self,
                 steps        = 1,
                 num_layers   = 2,
                 num_units    = 32,
                 eps          = 1e-2,
                 recurrent    = False,
                 nonlinearity = tanh,
                 ):
        self.steps = steps

        self.X = T.fmatrix()
        self.Y = T.fmatrix()

        def network(l):
            if recurrent:
                l = ReshapeLayer(l,
                                 shape = (-1, steps, 1))
                l = LSTMLayer(l, num_units)

            for k in range(num_layers):
                l = DenseLayer(l,
                               num_units    = num_units,
                               nonlinearity = nonlinearity)
            l = DenseLayer(l,
                           num_units    = 1,
                           nonlinearity = linear)

            return l

        self.network = network

        l = InputLayer(input_var = self.X,
                       shape     = (None, steps))
        l = self.network(l)

        self.l_ = l
        self.x_ = get_output(self.l_)

        self.f  = theano.function([self.X],
                                  self.x_,
                                  allow_input_downcast=True)

        l2_penalty = regularize_network_params(l,L2)
        error = squared_error(self.x_, self.Y).mean()
        loss = error + eps * l2_penalty
        params = get_all_params(l)
        updates = adam(loss,
                       params)

        self.error = theano.function([self.X,self.Y],
                                     error,
                                     allow_input_downcast=True)

        self.train = theano.function([self.X,self.Y],
                                     loss,
                                     updates=updates,
                                     allow_input_downcast=True)
Ejemplo n.º 23
0
def set_network_trainer(input_data,
                        input_mask,
                        target_data,
                        target_mask,
                        network,
                        updater,
                        learning_rate,
                        grad_max_norm=10.,
#                        l2_lambda=1e-5,
                        load_updater_params=None):

    # get network output data
    predict_data = get_output(network, deterministic=False)

    predict_idx = T.argmax(predict_data, axis=-1)

    # get prediction cost
    train_predict_cost = categorical_crossentropy(predictions=T.reshape(predict_data, (-1, predict_data.shape[-1])) + eps,
                                                  targets=T.flatten(target_data, 1))
    train_predict_cost = train_predict_cost*T.flatten(target_mask, 1)
    train_predict_cost = train_predict_cost.sum()/target_mask.sum()

    # get regularizer cost
    train_regularizer_cost = regularize_network_params(network, penalty=l2)

    # get network parameters
    network_params = get_all_params(network, trainable=True)

    # get network gradients with clipping
    network_grads = theano.grad(cost=train_predict_cost + train_regularizer_cost*l2_lambda,
                                wrt=network_params)
    network_grads = theano.grad(cost=train_predict_cost,
                                wrt=network_params)
    network_grads, network_grads_norm = total_norm_constraint(tensor_vars=network_grads,
                                                              max_norm=grad_max_norm,
                                                              return_norm=True)


    # set updater
    train_lr = theano.shared(lasagne.utils.floatX(learning_rate))
    train_updates, trainer_params = updater(loss_or_grads=network_grads,
                                            params=network_params,
                                            learning_rate=train_lr,
                                            load_params_dict=load_updater_params)

    # get training (update) function
    training_fn = theano.function(inputs=[input_data,
                                          input_mask,
                                          target_data,
                                          target_mask],
                                  outputs=[predict_data,
                                           predict_idx,
                                           train_predict_cost,
                                           train_regularizer_cost],
                                           network_grads_norm],
                                  updates=train_updates, allow_input_downcast=True)
Ejemplo n.º 24
0
    def build_treatment_model(self, n_vars, **kwargs):

        input_vars = TT.matrix()
        instrument_vars = TT.matrix()
        targets = TT.vector()

        inputs = layers.InputLayer((None, n_vars), input_vars)
        inputs = layers.DropoutLayer(inputs, p=0.2)

        dense_layer = layers.DenseLayer(inputs, 2 * kwargs['dense_size'], nonlinearity=nonlinearities.rectify)
        dense_layer = layers.batch_norm(dense_layer)
        dense_layer= layers.DropoutLayer(dense_layer, p=0.2)

        for _ in xrange(kwargs['n_dense_layers'] - 1):
            dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.rectify)
            dense_layer = layers.batch_norm(dense_layer)

        self.treatment_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear)
        init_params = layers.get_all_param_values(self.treatment_output)

        prediction = layers.get_output(self.treatment_output, deterministic=False)
        test_prediction = layers.get_output(self.treatment_output, deterministic=True)

        l2_cost = regularization.regularize_network_params(self.treatment_output, regularization.l2)
        loss = gmm_loss(prediction, targets, instrument_vars) + 1e-4 * l2_cost

        params = layers.get_all_params(self.treatment_output, trainable=True)
        param_updates = updates.adadelta(loss, params)

        self._train_fn = theano.function(
            [
                input_vars,
                targets,
                instrument_vars,
            ],
            loss,
            updates=param_updates
        )

        self._loss_fn = theano.function(
            [
                input_vars,
                targets,
                instrument_vars,
            ],
            loss,
        )

        self._output_fn = theano.function(
            [
                input_vars,
            ],
            test_prediction,
        )

        return init_params
    def test_regularize_network_params(self, layers):
        from lasagne.regularization import regularize_network_params
        l_1, l_2, l_3 = layers

        penalty = Mock(return_value=0)
        loss = regularize_network_params(l_3, penalty)

        assert penalty.call_count == 2
        penalty.assert_any_call(l_2.W)
        penalty.assert_any_call(l_3.W)
    def test_regularize_network_params(self, layers):
        from lasagne.regularization import regularize_network_params
        l_1, l_2, l_3 = layers

        penalty = Mock(return_value=0)
        loss = regularize_network_params(l_3, penalty)

        assert penalty.call_count == 2
        penalty.assert_any_call(l_2.W)
        penalty.assert_any_call(l_3.W)
Ejemplo n.º 27
0
    def build_instrument_model(self, n_vars, **kwargs):

        targets = TT.vector()
        instrument_vars = TT.matrix()

        instruments = layers.InputLayer((None, n_vars), instrument_vars)
        instruments = layers.DropoutLayer(instruments, p=0.2)

        dense_layer = layers.DenseLayer(instruments,
                                        kwargs['dense_size'],
                                        nonlinearity=nonlinearities.tanh)
        dense_layer = layers.DropoutLayer(dense_layer, p=0.2)

        for _ in xrange(kwargs['n_dense_layers'] - 1):
            dense_layer = layers.DenseLayer(dense_layer,
                                            kwargs['dense_size'],
                                            nonlinearity=nonlinearities.tanh)
            dense_layer = layers.DropoutLayer(dense_layer, p=0.5)

        self.instrument_output = layers.DenseLayer(
            dense_layer, 1, nonlinearity=nonlinearities.linear)
        init_params = layers.get_all_param_values(self.instrument_output)
        prediction = layers.get_output(self.instrument_output,
                                       deterministic=False)
        test_prediction = layers.get_output(self.instrument_output,
                                            deterministic=True)

        # flexible here, endog variable can be categorical, continuous, etc.
        l2_cost = regularization.regularize_network_params(
            self.instrument_output, regularization.l2)
        loss = objectives.squared_error(
            prediction.flatten(), targets.flatten()).mean() + 1e-4 * l2_cost
        loss_total = objectives.squared_error(prediction.flatten(),
                                              targets.flatten()).mean()

        params = layers.get_all_params(self.instrument_output, trainable=True)
        param_updates = updates.adadelta(loss, params)

        self._instrument_train_fn = theano.function([
            targets,
            instrument_vars,
        ],
                                                    loss,
                                                    updates=param_updates)

        self._instrument_loss_fn = theano.function([
            targets,
            instrument_vars,
        ], loss_total)

        self._instrument_output_fn = theano.function([instrument_vars],
                                                     test_prediction)

        return init_params
Ejemplo n.º 28
0
def triplet_loss_iter(embedder, update_params={}):
    X_triplets = {
            'anchor':T.tensor4(),
            'positive':T.tensor4(),
            'negative':T.tensor4(),
            } # each will be a batch of images

    final_emb_layer = embedder[-1]
    all_layers = ll.get_all_layers(embedder)
    imwrite_architecture(all_layers, './layer_rep.png')
    # assume we get a list of predictions (e.g. for jet architecture, but should work w/just one pred)
    # another assumption (which must hold when the network is being made)
    # the last prediction layer is a) the end of the network and b) what we ultimately care about
    # however the other prediction layers will be incorporated into the training loss
    predicted_embeds_train = {k:ll.get_output(embedder, X)[-1] for k, X in X_triplets.items()}
    predicted_embeds_valid = {k:ll.get_output(final_emb_layer, X, deterministic=True) for k, X in X_triplets.items()}

    # each output should be batch_size x embed_size

    # should give us a vector of batch_size of distances btw anchor and positive
    alpha = 0.2 # FaceNet alpha
    triplet_pos = lambda pred: (pred['anchor'] - pred['positive']).norm(2,axis=1)
    triplet_neg = lambda pred: (pred['anchor'] - pred['negative']).norm(2,axis=1)
    triplet_distances = lambda pred: (triplet_pos(pred) - triplet_neg(pred) + alpha).clip(0, np.inf)
    triplet_failed = lambda pred: T.mean(triplet_distances(pred) > alpha)
    triplet_loss = lambda pred: T.sum(triplet_distances(pred))

    decay = 0.001
    reg = regularize_network_params(final_emb_layer, l2) * decay
    losses_reg = lambda pred: triplet_loss(pred) + reg
    loss_train = losses_reg(predicted_embeds_train)
    loss_train.name = 'TL' # for the names
    #all_params = list(chain(*[ll.get_all_params(pred) for pred in embedder]))
    all_params = ll.get_all_params(embedder, trainable=True) # this should work with multiple 'roots'
    grads = T.grad(loss_train, all_params, add_names=True)
    updates = adam(grads, all_params)
    #updates = nesterov_momentum(grads, all_params, update_params['l_r'], momentum=update_params['momentum'])

    print("Compiling network for training")
    tic = time.time()
    train_iter = theano.function([X_triplets['anchor'], X_triplets['positive'], X_triplets['negative']], [loss_train] + grads, updates=updates)
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    print("Compiling network for validation")
    tic = time.time()
    valid_iter = theano.function([X_triplets['anchor'], X_triplets['positive'], X_triplets['negative']], [triplet_loss(predicted_embeds_valid),
                                                                                                          losses_reg(predicted_embeds_valid),
                                                                                                          triplet_failed(predicted_embeds_valid)])
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
Ejemplo n.º 29
0
def contrastive_loss_iter(embedder, update_params={}):
    X_pairs = {
            'img1':T.tensor4(),
            'img2':T.tensor4(),
            }
    y = T.ivector() # basically class labels

    final_emb_layer = embedder[-1]
    all_layers = ll.get_all_layers(embedder)
    imwrite_architecture(all_layers, './layer_rep.png')
    # assume we get a list of predictions (e.g. for jet architecture, but should work w/just one pred)
    # another assumption (which must hold when the network is being made)
    # the last prediction layer is a) the end of the network and b) what we ultimately care about
    # however the other prediction layers will be incorporated into the training loss
    predicted_embeds_train = {k:ll.get_output(embedder, X)[-1] for k, X in X_pairs.items()}
    predicted_embeds_valid = {k:ll.get_output(final_emb_layer, X, deterministic=True) for k, X in X_pairs.items()}

    margin = 1

    # if distance is 0 that's bad
    distance = lambda pred: (pred['img1'] - pred['img2'] + 1e-7).norm(2, axis=1)
    contrastive_loss = lambda pred: T.mean(y*(distance(pred)) + (1 - y)*(margin - distance(pred)).clip(0,np.inf))
    failed_matches = lambda pred: T.switch(T.eq(T.sum(y),0), 0, T.sum((y*distance(pred)) > margin) / T.sum(y))
    failed_nonmatches = lambda pred: T.switch(T.eq(T.sum(1-y),0), 0, T.sum((1-y*distance(pred)) < margin) / T.sum(1-y))
    failed_pairs = lambda pred: 0.5*failed_matches(pred) + 0.5*failed_nonmatches(pred)

    decay = 0.0001
    reg = regularize_network_params(final_emb_layer, l2) * decay
    losses_reg = lambda pred: contrastive_loss(pred) + reg
    loss_train = losses_reg(predicted_embeds_train)
    loss_train.name = 'CL' # for the names
    #all_params = list(chain(*[ll.get_all_params(pred) for pred in embedder]))
    all_params = ll.get_all_params(embedder, trainable=True) # this should work with multiple 'roots'
    grads = T.grad(loss_train, all_params, add_names=True)
    updates = adam(grads, all_params)
    #updates = nesterov_momentum(grads, all_params, update_params['l_r'], momentum=update_params['momentum'])

    print("Compiling network for training")
    tic = time.time()
    train_iter = theano.function([X_pairs['img1'], X_pairs['img2'], y], [loss_train] + grads, updates=updates)
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    print("Compiling network for validation")
    tic = time.time()
    valid_iter = theano.function([X_pairs['img1'], X_pairs['img2'], y], [
                                    contrastive_loss(predicted_embeds_valid),
                                    losses_reg(predicted_embeds_valid),
                                    failed_pairs(predicted_embeds_valid)])
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
Ejemplo n.º 30
0
def loss_iter(segmenter, update_params={}):
    X = T.tensor4()
    y = T.tensor4()
    pixel_weights = T.tensor3()

    final_pred_layer = segmenter[-1]
    all_layers = ll.get_all_layers(segmenter)
    imwrite_architecture(all_layers, './layer_rep.png')
    # assume we get a list of predictions (e.g. for jet architecture, but should work w/just one pred)
    # another assumption (which must hold when the network is being made)
    # the last prediction layer is a) the end of the network and b) what we ultimately care about
    # however the other prediction layers will be incorporated into the training loss
    predicted_masks_train = ll.get_output(segmenter, X)
    predicted_mask_valid = ll.get_output(final_pred_layer, X, deterministic=True)

    thresh = 0.5
    accuracy = lambda pred: T.mean(T.eq(T.argmax(pred, axis=1), T.argmax(y, axis=1)))
    true_pos = lambda pred: T.sum((pred[:,0,:,:] > thresh) * (y[:,0,:,:] > thresh))
    false_pos = lambda pred: T.sum((pred[:,0,:,:] > thresh) - (y[:,0,:,:] > thresh))
    precision = lambda pred: (true_pos(pred) / (true_pos(pred) + false_pos(pred)))

    pixel_weights_1d = pixel_weights.flatten(ndim=1)
    losses = lambda pred: T.mean(crossentropy_flat(pred + 1e-7, y + 1e-7) * pixel_weights_1d)

    decay = 0.0001
    reg = regularize_network_params(final_pred_layer, l2) * decay
    losses_reg = lambda pred: losses(pred) + reg
    loss_train = T.sum([losses_reg(mask) for mask in predicted_masks_train])
    loss_train.name = 'CE' # for the names
    #all_params = list(chain(*[ll.get_all_params(pred) for pred in segmenter]))
    all_params = ll.get_all_params(segmenter, trainable=True) # this should work with multiple 'roots'
    grads = T.grad(loss_train, all_params, add_names=True)
    updates = adam(grads, all_params)
    #updates = nesterov_momentum(grads, all_params, update_params['l_r'], momentum=update_params['momentum'])
    acc_train = accuracy(predicted_masks_train[-1])
    acc_valid = accuracy(predicted_mask_valid)
    prec_train = precision(predicted_masks_train[-1])
    prec_valid = precision(predicted_mask_valid)

    print("Compiling network for training")
    tic = time.time()
    train_iter = theano.function([X, y, pixel_weights], [loss_train] + grads, updates=updates)
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    print("Compiling network for validation")
    tic = time.time()
    valid_iter = theano.function([X, y, pixel_weights], [losses(predicted_mask_valid), losses_reg(predicted_mask_valid), prec_valid])
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
Ejemplo n.º 31
0
    def initialize(self):
        self.prediction = lasagne.layers.get_output(self.network)
        loss = lasagne.objectives.categorical_crossentropy(
            self.prediction, self.target)
        self.loss = loss.mean()

        self.params = lasagne.layers.get_all_params(self.network,
                                                    trainable=True)
        self.updates = lasagne.updates.nesterov_momentum(
            self.loss,
            self.params,
            learning_rate=self.learning_rate,
            momentum=0.9)

        self.train_fn = theano.function([self.input, self.target],
                                        loss,
                                        updates=self.updates,
                                        allow_input_downcast=True)
        outputs = T.argmax(self.prediction, axis=1)
        # self.predict_values = theano.function([self.input], self.prediction, allow_input_downcast=True)
        self.predict_values = theano.function([self.input],
                                              outputs,
                                              allow_input_downcast=True)

        self.test_prediction = lasagne.layers.get_output(self.network,
                                                         deterministic=True)
        self.test_loss = lasagne.objectives.categorical_crossentropy(
            self.test_prediction, self.target)
        l1 = regularize_network_params(self.network, lasagne.regularization.l1)
        l2 = regularize_network_params(self.network, lasagne.regularization.l2)
        self.test_loss = self.test_loss.mean() + (l1 * 1e-4) + l2

        self.test_acc = T.mean(T.eq(T.argmax(self.test_prediction, axis=1),
                                    self.target),
                               dtype=theano.config.floatX)
        self.val_fn = theano.function([self.input, self.target],
                                      [self.test_loss, self.test_acc],
                                      allow_input_downcast=True)
Ejemplo n.º 32
0
    def _create_network(self):
        logger.info("Building network ...")
        net, input_var = self._build_network()
        target_values = T.matrix('target_output')
        actions = T.icol('actions')

        # Create masks
        # mask = theano.shared(np.zeros((self.batch_size, self.num_actions)).astype(np.int32))
        mask = T.zeros_like(target_values)
        mask = T.set_subtensor(
            mask[T.arange(self.batch_size),
                 actions.reshape((-1, ))], 1)

        # feed-forward path
        network_output = lasagne.layers.get_output(net, input_var / 255.0)

        # Add regularization penalty
        loss = squared_error(network_output * mask, target_values).mean()
        if self.weight_decay > 0.0:
            loss += regularize_network_params(net, l2) * self.weight_decay

        # Retrieve all parameters from the network
        all_params = lasagne.layers.get_all_params(net, trainable=True)

        # Compute updates for training
        if self.clip_error:
            grads = theano.gradient.grad(loss, all_params)
            grads = [
                lasagne.updates.norm_constraint(grad, self.clip_error,
                                                range(grad.ndim))
                for grad in grads
            ]
            updates = self.optimizer(grads,
                                     all_params,
                                     learning_rate=self.learning_rate,
                                     rho=self.decay_rate)
        else:
            updates = self.optimizer(loss,
                                     all_params,
                                     learning_rate=self.learning_rate,
                                     rho=self.decay_rate)

        # Theano functions for training and computing cost
        logger.info("Compiling functions ...")
        train = theano.function([input_var, target_values, actions],
                                [loss, network_output, target_values, mask],
                                updates=updates)
        predict = theano.function([input_var], network_output)

        return net, train, predict
Ejemplo n.º 33
0
    def initialize(self):
        self.prediction = lasagne.layers.get_output(self.network)
        loss = lasagne.objectives.categorical_crossentropy(self.prediction, self.target)
        self.loss = loss.mean()

        self.params = lasagne.layers.get_all_params(self.network, trainable=True)
        self.updates = lasagne.updates.nesterov_momentum(
            self.loss, self.params, learning_rate=self.learning_rate, momentum=0.9)

        self.train_fn = theano.function([self.input, self.target], loss, updates=self.updates,
                                        allow_input_downcast=True)
        self.predict_values = theano.function([self.input], T.argmax(self.prediction, axis=1),
                                              allow_input_downcast=True)

        self.test_prediction = lasagne.layers.get_output(self.network, deterministic=True)
        self.test_loss = lasagne.objectives.categorical_crossentropy(self.test_prediction, self.target)
        l1 = regularize_network_params(self.network, lasagne.regularization.l1)
        l2 = regularize_network_params(self.network, lasagne.regularization.l2)
        self.test_loss = self.test_loss.mean() + (l1 * 1e-4) + l2

        self.test_acc = T.mean(T.eq(T.argmax(self.test_prediction, axis=1), self.target), dtype=theano.config.floatX)
        self.val_fn = theano.function([self.input, self.target], [self.test_loss, self.test_acc],
                                      allow_input_downcast=True)
Ejemplo n.º 34
0
def build_network(args, network):
    X = T.tensor4('X')
    Y = T.ivector('Y')
    
    #physics weights
    W = T.dvector('W')
    
    #make sum to 1
    #w = W / T.sum(W)
    #network = build_layers(args)
    
    '''write loss function equation'''
    prediction = get_output(network, X)
    loss = categorical_crossentropy(prediction, Y)
    
    #multiply by weights
    loss =  T.dot(loss.T,W)
    weightsl2 = regularize_network_params(network, l2)
    loss += args['weight_decay'] * weightsl2
    
    '''calculate test loss (cross entropy with no regularization) and accuracy'''
    test_prediction = get_output(network, X, deterministic=True)
    test_loss = categorical_crossentropy(test_prediction, Y)
    test_loss = T.dot(test_loss.T,W)
    
    
    '''classification percentage: we can change this based on false postive/false negative criteria'''
    test_acc = categorical_accuracy(test_prediction,Y)
    test_acc = T.dot(test_acc.T,W) / T.sum(W)
    params = get_all_params(network, trainable=True)
    
    updates = adam(loss, learning_rate=args['learning_rate'], params=params)
    #updates = nesterov_momentum(loss, params, learning_rate=args['learning_rate'], momentum=args['momentum'])
    
    
    '''train_fn -> takes in input,label pairs -> outputs loss '''
    train_fn = theano.function([X, Y, W], loss, updates=updates)
    
    
    '''val_fn -> takes in input,label pairs -> outputs non regularized loss and accuracy '''
    val_fn = theano.function([X, Y, W], test_loss)
    acc_fn = theano.function([X, Y, W], test_acc)
    out_fn = theano.function([X], test_prediction)
    score_fn = theano.function([X], test_prediction[:,1].T)
    return {"net":network}, {'tr': train_fn, 
                            'val': val_fn,
                             'test': val_fn,
                            'acc': acc_fn,
                            'out': out_fn, "score":score_fn}
Ejemplo n.º 35
0
    def build_instrument_model(self, n_vars, **kwargs):

        targets = TT.vector()
        instrument_vars = TT.matrix()

        instruments = layers.InputLayer((None, n_vars), instrument_vars)
        instruments = layers.DropoutLayer(instruments, p=0.2)

        dense_layer = layers.DenseLayer(instruments, kwargs['dense_size'], nonlinearity=nonlinearities.tanh)
        dense_layer = layers.DropoutLayer(dense_layer, p=0.2)

        for _ in xrange(kwargs['n_dense_layers'] - 1):
            dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.tanh)
            dense_layer = layers.DropoutLayer(dense_layer, p=0.5)

        self.instrument_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear)
        init_params = layers.get_all_param_values(self.instrument_output)
        prediction = layers.get_output(self.instrument_output, deterministic=False)
        test_prediction = layers.get_output(self.instrument_output, deterministic=True)

        # flexible here, endog variable can be categorical, continuous, etc.
        l2_cost = regularization.regularize_network_params(self.instrument_output, regularization.l2)
        loss = objectives.squared_error(prediction.flatten(), targets.flatten()).mean() + 1e-4 * l2_cost
        loss_total = objectives.squared_error(prediction.flatten(), targets.flatten()).mean()

        params = layers.get_all_params(self.instrument_output, trainable=True)
        param_updates = updates.adadelta(loss, params)

        self._instrument_train_fn = theano.function(
            [
                targets,
                instrument_vars,
            ],
            loss,
            updates=param_updates
        )

        self._instrument_loss_fn = theano.function(
            [
                targets,
                instrument_vars,
            ],
            loss_total
        )

        self._instrument_output_fn = theano.function([instrument_vars], test_prediction)

        return init_params
Ejemplo n.º 36
0
def build(layer_heads, params):
    """"""
    fns = {}  # model methods
    x = T.tensor4('input')

    for target in params['targets']:
        fns[target['name']] = {}
        out_layer = layer_heads[target['name']]

        y = T.matrix('target')
        o = L.get_output(out_layer, inputs=x)
        o_vl = L.get_output(out_layer, inputs=x, deterministic=True)

        if 'class_weight' in params and params['class_weight']:
            loss_fn = partial(weighted_cce, weights=params['class_weight'])
        else:
            loss_fn = obj.categorical_crossentropy

        loss = loss_fn(o, y).mean()
        loss_vl = loss_fn(o_vl, y).mean()
        wd_l2 = reg.regularize_network_params(out_layer, reg.l2)
        wd_l2 *= params['beta']

        acc_vl = obj.categorical_accuracy(o_vl, y).mean()

        updates_ = updates.adam(loss + wd_l2,
                                L.get_all_params(out_layer, trainable=True),
                                learning_rate=params['learning_rate'],
                                epsilon=params['epsilon'])

        fns[target['name']]['train'] = theano.function(
            [x, y], updates=updates_, allow_input_downcast=True)
        fns[target['name']]['predict'] = theano.function(
            [x], o_vl, allow_input_downcast=True)
        fns[target['name']]['cost'] = theano.function(
            [x, y], loss_vl, allow_input_downcast=True)
        fns[target['name']]['acc'] = theano.function([x, y],
                                                     acc_vl,
                                                     allow_input_downcast=True)
        fns[target['name']]['transform'] = theano.function(
            [x],
            L.get_output(L.get_all_layers(layer_heads[target['name']])[-2],
                         inputs=x,
                         deterministic=True),
            allow_input_downcast=True)

    return fns, layer_heads
Ejemplo n.º 37
0
def build_network(args, network):
    X = T.tensor4('X')
    Y = T.ivector('Y')

    #physics weights
    W = T.dvector('W')

    #make sum to 1
    #w = W / T.sum(W)
    #network = build_layers(args)
    '''write loss function equation'''
    prediction = get_output(network, X)
    loss = categorical_crossentropy(prediction, Y)

    #multiply by weights
    loss = T.dot(loss.T, W)
    weightsl2 = regularize_network_params(network, l2)
    loss += args['weight_decay'] * weightsl2
    '''calculate test loss (cross entropy with no regularization) and accuracy'''
    test_prediction = get_output(network, X, deterministic=True)
    test_loss = categorical_crossentropy(test_prediction, Y)
    test_loss = T.dot(test_loss.T, W)
    '''classification percentage: we can change this based on false postive/false negative criteria'''
    test_acc = categorical_accuracy(test_prediction, Y)
    test_acc = T.dot(test_acc.T, W) / T.sum(W)
    params = get_all_params(network, trainable=True)

    updates = adam(loss, learning_rate=args['learning_rate'], params=params)
    #updates = nesterov_momentum(loss, params, learning_rate=args['learning_rate'], momentum=args['momentum'])
    '''train_fn -> takes in input,label pairs -> outputs loss '''
    train_fn = theano.function([X, Y, W], loss, updates=updates)
    '''val_fn -> takes in input,label pairs -> outputs non regularized loss and accuracy '''
    val_fn = theano.function([X, Y, W], test_loss)
    acc_fn = theano.function([X, Y, W], test_acc)
    out_fn = theano.function([X], test_prediction)
    score_fn = theano.function([X], test_prediction[:, 1].T)
    return {
        "net": network
    }, {
        'tr': train_fn,
        'val': val_fn,
        'test': val_fn,
        'acc': acc_fn,
        'out': out_fn,
        "score": score_fn
    }
Ejemplo n.º 38
0
def build_network(args, network):
    X = T.tensor4('X')
    #Y = T.tensor4('Y')
    thresh = 1.0
    #network = build_layers(args)
    '''write loss function equation'''
    prediction = get_output(network, X)
    loss = squared_error(prediction, X).mean()
    weightsl2 = regularize_network_params(network, l2).sum()
    loss += args['weight_decay'] * weightsl2
    
    '''calculate test loss (cross entropy with no regularization) and accuracy'''
    test_prediction = get_output(network, X, deterministic=True)
    test_loss = squared_error(test_prediction, X).sum()
    
    
    '''classification percentage: we can change this based on false postive/false negative criteria'''
    '''max reconstriuction error'''
    test_acc = test_loss 
    test_score = T.sum(squared_error(test_prediction, X), axis=(1,2,3))
    with T.autocast_float_as("float64"):
        test_score = test_score / (T.prod(X.shape[1:]))
        inds = test_score[test_score > thresh].nonzero()
        test_score = T.set_subtensor(test_score[inds], 1) 
        #test_score = ifelse(T.gt(test_score,thresh), thresh,test_score )
        test_score = 1 - test_score
    params = get_all_params(network, trainable=True)
    
    updates = adam(loss, learning_rate=args['learning_rate'], params=params)
    #updates = nesterov_momentum(loss, params, learning_rate=args['learning_rate'], momentum=args['momentum'])
    
    
    '''train_fn -> takes in input,label pairs -> outputs loss '''
    train_fn = theano.function([X], loss, updates=updates)
    
    
    '''val_fn -> takes in input,label pairs -> outputs non regularized loss and accuracy '''
    val_fn = theano.function([X], test_loss)
    acc_fn = theano.function([X], test_acc)
    out_fn = theano.function([X], test_prediction)
    score_fn = theano.function([X], test_score)
    return {"net":network}, {'tr': train_fn, 
                            'val': val_fn,
                            'acc': acc_fn,
                            'out': out_fn, 
                            "score": score_fn}
Ejemplo n.º 39
0
 def objective(layers, loss_function, target, aggregate=aggregate,
               deterministic=False, get_output_kw=None):
     if get_output_kw is None:
         get_output_kw = {}
     output_layer = layers[-1]
     first_layer = layers[1]
     network_output = lasagne.layers.get_output(
         output_layer, deterministic=deterministic, **get_output_kw)
     if not deterministic:
         losses = loss_function(network_output, target) \
                 + l2 * regularization.regularize_network_params(
                     output_layer, regularization.l2) \
                 + l1 * regularization.regularize_layer_params(
                     first_layer, regularization.l1)
     else:
         losses = loss_function(network_output, target)
     return aggregate(losses)
 def __build_loss_train__fn__(self):
     # create loss function
     prediction = layers.get_output(self.net)
     loss = objectives.categorical_crossentropy(prediction, self.__target_var__)
     loss = loss.mean() + 1e-4 * regularization.regularize_network_params(self.net, regularization.l2)
     
     val_acc = T.mean(T.eq(T.argmax(prediction, axis=1), self.__target_var__),dtype=theano.config.floatX)
     
     # create parameter update expressions
     params = layers.get_all_params(self.net, trainable=True)
     self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32))
     update_rule = updates.nesterov_momentum(loss, params, learning_rate=self.eta,
                                                 momentum=0.9)
     
     # compile training function that updates parameters and returns training loss
     self.__train_fn__ = theano.function([self.__input_var__,self.__target_var__], loss, updates=update_rule)
     self.__predict_fn__ = theano.function([self.__input_var__], layers.get_output(self.net,deterministic=True))
     self.__val_fn__ = theano.function([self.__input_var__,self.__target_var__], [loss,val_acc])
Ejemplo n.º 41
0
def similarity_iter(output_layer, match_layer, update_params, match_layer_w=0):
    X1 = T.tensor4()
    X2 = T.tensor4()
    y = T.ivector()

    # find the input layers
    # TODO this better
    all_layers = ll.get_all_layers(match_layer)
    # make image of all layers
    imwrite_architecture(all_layers, './layer_rep.png')

    input_1 = filter(lambda x: x.name == 'input1', all_layers)[0]
    input_2 = filter(lambda x: x.name == 'input2', all_layers)[0]

    descriptors_train, match_prob_train = ll.get_output([output_layer, match_layer], {input_1: X1, input_2: X2})
    descriptors_eval, match_prob_eval = ll.get_output([output_layer, match_layer], {input_1: X1, input_2: X2}, deterministic=True)
    #descriptor_shape = ll.get_output_shape(output_layer, {input_1: X1, input_2: X2})
    #print("Network output shape: %r" % (descriptor_shape,))
    # distance minimization
    distance = lambda x: (x[:,0,:] - x[:,1,:] + 1e-7).norm(2, axis=1)
    #distance_eval = (descriptors_eval[:,0,:] - descriptors_eval[:,1,:] + 1e-7).norm(2, axis=1)
    # 9/21 squaring the loss seems to prevent it from getting to 0.5 really quickly (i.e. w/in 3 epochs)
    # let's see if it will learn something good
    margin = 1
    decay = 0
    reg = regularize_network_params(match_layer, l2) * decay
    loss = lambda x, z: ((1-match_layer_w)*T.mean(y*(distance(x)) + (1 - y)*(T.maximum(0, margin - distance(x))))/2 # constrastive loss
            + match_layer_w*T.mean(binary_crossentropy(z.T + 1e-7,y))) # matching loss
    loss_reg = lambda x, z: (loss(x,z) + reg)
    # this loss doesn't work since it just pushes all the descriptors near each other and then predicts 0 all the time for tha matching
    #jason_loss = lambda x, z: T.mean(distance(x)*y + (1-y)*binary_crossentropy(z.T + 1e-7,y))
    #loss_eval = T.mean(y*(distance_eval**2) + (1 - y)*(T.maximum(0, 1 - distance_eval)**2))
    all_params = ll.get_all_params(match_layer) # unsure how I would do this if there were truly two trainable branches...
    loss_train = loss_reg(descriptors_train, match_prob_train)
    loss_train.name = 'combined_loss' # for the names
    grads = T.grad(loss_train, all_params, add_names=True)
    #updates = adam(grads, all_params, **update_params)
    updates = nesterov_momentum(grads, all_params, **update_params)

    train_iter = theano.function([X1, X2, y], [loss_train, loss(descriptors_train, match_prob_train)] + grads, updates=updates)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    valid_iter = theano.function([X1, X2, y], loss(descriptors_eval, match_prob_eval))

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
Ejemplo n.º 42
0
 def get_train_fn(self, last_only=False):
     input_var = self.net['input'].input_var
     target_var = T.ivector('targets')
     prediction = lasagne.layers.get_output(self.output_layer)
     loss = categorical_crossentropy(prediction, target_var)
     loss = loss.mean()
     error = T.mean(T.neq(T.argmax(prediction, axis=1), target_var),
                    dtype=theano.config.floatX)
     regularization = self.regularizer_amount * regularize_network_params(
         self.output_layer, l2)
     if last_only:
         all_params = self.output_layer.get_params(trainable=True)
     else:
         all_params = lasagne.layers.get_all_params(self.output_layer,
                                                    trainable=True)
     updates = nesterov_momentum(loss + regularization,
                                 all_params,
                                 learning_rate=self.lr)
     return theano.function([input_var, target_var], (loss, error),
                            updates=updates)
Ejemplo n.º 43
0
 def __init__(self, lambda1 = 1e-5, lambda2 = 1e-6):
     self.input_var = T.tensor4('inputs')
     self.target_var = T.matrix('targets')
     self.are_net = build_ARE(self.input_var, ENCODE_SIZE)
     self.reconstructed = lasagne.layers.get_output(self.are_net)
     self.encode_layer, _ = get_layer_by_name(self.are_net, 'encode')
     self.action_layer, _ = get_layer_by_name(self.are_net, 'action')
     self.encoded_feature = lasagne.layers.get_output(self.encode_layer)
     self.transformed_feature = lasagne.layers.get_output(self.action_layer)
     self.XXT = T.dot(self.encoded_feature, self.encoded_feature.transpose())
     self.l2_penalty = regularize_network_params(self.are_net,l2)
     self.loss = lasagne.objectives.squared_error(self.reconstructed, self.target_var)
     self.loss = 1000*self.loss.mean() - lambda1 * self.XXT.trace() + lambda2 * self.l2_penalty
     self.params = lasagne.layers.get_all_params(self.are_net, trainable=True)
     self.updates = lasagne.updates.adadelta(self.loss, self.params)
     self.train_fn = theano.function([self.input_var, self.target_var], self.loss, updates=self.updates,on_unused_input='warn')
     self.best_err = 999
     self.action1_w = np.eye(ENCODE_SIZE, dtype = np.float32)
     self.action1_b = np.zeros(ENCODE_SIZE, dtype = np.float32)
     self.action2_w = np.eye(ENCODE_SIZE, dtype = np.float32)
     self.action2_b = np.zeros(ENCODE_SIZE, dtype = np.float32)
def make_training_functions(network_layers, input_var, target_var, stack_params, weight_decay):
    encode_layer, hidden_layer, smth_act_layer, network = network_layers;

    output = lasagne.layers.get_output(network, deterministic = True);
    loss = lasagne.objectives.squared_error(output, target_var).mean() + \
           weight_decay * regularization.regularize_network_params(
                   layer = network, penalty = regularization.l2, tags={'regularizable' : True});

    params = layers.get_all_params(network, trainable = True);
    updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate = 0.00001, momentum = 0.95);
    stack_updates = lasagne.updates.nesterov_momentum(loss, stack_params, learning_rate = 0.00001, momentum = 0.95);

    encode = lasagne.layers.get_output(encode_layer, deterministic = True);
    hidden = lasagne.layers.get_output(hidden_layer, deterministic = True);
    smth_act = lasagne.layers.get_output(smth_act_layer, deterministic = True);

    val_fn = theano.function([input_var, target_var], [loss, encode, hidden, smth_act, output]);
    train_fn = theano.function([input_var, target_var], loss, updates = updates);
    stack_train_fn = theano.function([input_var, target_var], loss, updates = stack_updates);

    return val_fn, train_fn, stack_train_fn;
Ejemplo n.º 45
0
def loss_iter(segmenter, update_params={}):
    X = T.tensor4()
    y = T.tensor4()
    pixel_weights = T.tensor3()

    all_layers = ll.get_all_layers(segmenter)
    imwrite_architecture(all_layers, './layer_rep.png')
    predicted_mask_train = ll.get_output(segmenter, X)
    predicted_mask_valid = ll.get_output(segmenter, X, deterministic=True)

    accuracy = lambda pred: T.mean(T.eq(T.argmax(pred, axis=1), T.argmax(y, axis=1)))

    pixel_weights_1d = pixel_weights.flatten(ndim=1)
    losses = lambda pred: T.mean(crossentropy_flat(pred + 1e-7, y + 1e-7) * pixel_weights_1d)

    decay = 0.0001
    reg = regularize_network_params(segmenter, l2) * decay
    losses_reg = lambda pred: losses(pred) + reg
    loss_train = losses_reg(predicted_mask_train)
    loss_train.name = 'combined_loss' # for the names
    all_params = ll.get_all_params(segmenter)
    grads = T.grad(loss_train, all_params, add_names=True)
    #updates = adam(grads, all_params, **update_params)
    updates = adam(grads, all_params, **update_params)
    acc_train = accuracy(predicted_mask_train)
    acc_valid = accuracy(predicted_mask_valid)

    print("Compiling network for training")
    tic = time.time()
    train_iter = theano.function([X, y, pixel_weights], [loss_train, losses(predicted_mask_train), acc_train] + grads, updates=updates)
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    print("Compiling network for validation")
    tic = time.time()
    valid_iter = theano.function([X, y, pixel_weights], [losses(predicted_mask_valid), acc_valid])
    toc = time.time() - tic
    print("Took %0.2f seconds" % toc)

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
Ejemplo n.º 46
0
    def _create_network(self):
        print("Building network ...")
        net, input_var = self._build_network()
        target_values = T.matrix('target_output')
        maxQ_idx = target_values.argmax(1)

        # Create masks
        mask = theano.shared(
            np.ones((BATCH_SIZE, self.actionsNum)).astype(np.int32))
        maxQ_mask = theano.shared(
            np.zeros((BATCH_SIZE, self.actionsNum)).astype(np.int32))
        mask = T.set_subtensor(mask[np.arange(BATCH_SIZE), maxQ_idx], 0)
        maxQ_mask = T.set_subtensor(maxQ_mask[np.arange(BATCH_SIZE), maxQ_idx],
                                    1)

        # lasagne.layers.get_output produces a variable for the output of the net
        network_output = lasagne.layers.get_output(net)
        new_target_values = target_values * maxQ_mask + network_output * mask

        err = squared_error(network_output, new_target_values)

        # Add regularization penalty
        cost = err.mean() + regularize_network_params(net, l2) * DECAY

        # Retrieve all parameters from the network
        all_params = lasagne.layers.get_all_params(net, trainable=True)

        # Compute SGD updates for training
        updates = lasagne.updates.adadelta(cost, all_params)

        # Theano functions for training and computing cost
        print("Compiling functions ...")
        train = theano.function(
            [input_var, target_values],
            [cost, new_target_values, network_output,
             err.mean(1), maxQ_idx],
            updates=updates)
        predict = theano.function([input_var], lasagne.layers.get_output(net))

        return net, train, predict
Ejemplo n.º 47
0
 def __init__(self, lambda1 = 0, lambda2 = 0):
     self.input_var = T.tensor4('inputs')
     self.target_var = T.matrix('targets')
     self.are_net = build_ARE(self.input_var, ENCODE_SIZE)
     self.reconstructed = lasagne.layers.get_output(self.are_net)
     self.encode_layer, _ = get_layer_by_name(self.are_net, 'encode')
     self.action_layer, _ = get_layer_by_name(self.are_net, 'action')
     self.encoded_feature = lasagne.layers.get_output(self.encode_layer)
     self.transformed_feature = lasagne.layers.get_output(self.action_layer)
     self.l1_penalty = regularize_network_params(self.are_net, l1)
     self.loss = lasagne.objectives.squared_error(self.reconstructed, self.target_var)
     self.XXT = T.dot(self.encoded_feature, self.encoded_feature.transpose()) + T.dot(self.transformed_feature, self.transformed_feature.transpose())
     self.loss = self.loss.mean() + lambda1 * self.l1_penalty + lambda2 * self.XXT.trace()
     self.loss = self.loss.mean() + lambda1 * self.l1_penalty
     self.params = lasagne.layers.get_all_params(self.are_net, trainable=True)
     self.l_r = theano.shared(np.array(0.01, dtype=theano.config.floatX))
     self.updates = lasagne.updates.nesterov_momentum(
         self.loss, self.params, learning_rate=self.l_r, momentum=0.90)
     self.train_fn = theano.function([self.input_var, self.target_var], self.loss, updates=self.updates,on_unused_input='warn')
     self.best_err = 999
     self.action1_w = np.eye(ENCODE_SIZE, dtype = np.float32)
     self.action1_b = np.zeros(ENCODE_SIZE, dtype = np.float32)
     self.action2_w = np.eye(ENCODE_SIZE, dtype = np.float32)
     self.action2_b = np.zeros(ENCODE_SIZE, dtype = np.float32)
Ejemplo n.º 48
0
def define_updates(network, input_var, target_var, weight_var):
    params = lasagne.layers.get_all_params(network, trainable=True)

    out = lasagne.layers.get_output(network)
    test_out = lasagne.layers.get_output(network, deterministic=True)

    l2_loss = P.L2_LAMBDA * regularize_network_params(network, l2)

    train_metrics = score_metrics(out, target_var, weight_var, l2_loss)
    loss, acc, dice_score, target_prediction, prediction, prediction_binary = train_metrics

    val_metrics = score_metrics(test_out, target_var, weight_var, l2_loss)
    t_loss, t_acc, t_dice_score, t_target_prediction, t_prediction, t_prediction_binary = train_metrics



    l_r = theano.shared(np.array(P.LEARNING_RATE, dtype=theano.config.floatX))

    if P.OPTIMIZATION == 'nesterov':
        updates = lasagne.updates.nesterov_momentum(
                loss, params, learning_rate=l_r, momentum=P.MOMENTUM)
    if P.OPTIMIZATION == 'adam':
        updates = lasagne.updates.adam(
                loss, params, learning_rate=l_r)

    logging.info("Defining train function")
    train_fn = theano.function([input_var, target_var, weight_var],[
                                loss, l2_loss, acc, dice_score, target_prediction, prediction, prediction_binary],
                                updates=updates)

    logging.info("Defining validation function")
    val_fn = theano.function([input_var, target_var, weight_var], [
                                t_loss, l2_loss, t_acc, t_dice_score, t_target_prediction, t_prediction, t_prediction_binary])


    return train_fn, val_fn, l_r
Ejemplo n.º 49
0
    def _create_network(self):
        logger.info("Building network ...")
        net, input_var = self._build_network()
        target_values = T.matrix('target_output')
        actions = T.icol('actions')

        # Create masks
        # mask = theano.shared(np.zeros((self.batch_size, self.num_actions)).astype(np.int32))
        mask = T.zeros_like(target_values)
        mask = T.set_subtensor(mask[T.arange(self.batch_size), actions.reshape((-1,))], 1)

        # feed-forward path
        network_output = lasagne.layers.get_output(net, input_var / 255.0)

        # Add regularization penalty
        loss = squared_error(network_output * mask, target_values).mean()
        if self.weight_decay > 0.0:
            loss += regularize_network_params(net, l2) * self.weight_decay

        # Retrieve all parameters from the network
        all_params = lasagne.layers.get_all_params(net, trainable=True)

        # Compute updates for training
        if self.clip_error:
            grads = theano.gradient.grad(loss, all_params)
            grads = [lasagne.updates.norm_constraint(grad, self.clip_error, range(grad.ndim)) for grad in grads]
            updates = self.optimizer(grads, all_params, learning_rate=self.learning_rate, rho=self.decay_rate)
        else:
            updates = self.optimizer(loss, all_params, learning_rate=self.learning_rate, rho=self.decay_rate)

        # Theano functions for training and computing cost
        logger.info("Compiling functions ...")
        train = theano.function([input_var, target_values, actions], [loss, network_output, target_values, mask], updates=updates)
        predict = theano.function([input_var], network_output)

        return net, train, predict
Ejemplo n.º 50
0
def train(dataset, learn_step=0.005,
          weight_decay=1e-4, num_epochs=500,
          max_patience=100, data_augmentation={},
          savepath=None, loadpath=None,
          early_stop_class=None,
          batch_size=None,
          resume=False,
          train_from_0_255=False):

    #
    # Prepare load/save directories
    #
    exp_name = 'unet_' + 'data_aug' if bool(data_augmentation) else ''

    if savepath is None:
        raise ValueError('A saving directory must be specified')

    savepath = os.path.join(savepath, dataset, exp_name)
    # loadpath = os.path.join(loadpath, dataset, exp_name)
    print(savepath)
    # print loadpath

    if not os.path.exists(savepath):
        os.makedirs(savepath)
    else:
        print('\033[93m The following folder already exists {}. '
              'It will be overwritten in a few seconds...\033[0m'.format(
                  savepath))

    print('Saving directory : ' + savepath)
    with open(os.path.join(savepath, "config.txt"), "w") as f:
        for key, value in locals().items():
            f.write('{} = {}\n'.format(key, value))

    #
    # Define symbolic variables
    #
    input_var = T.tensor4('input_var')
    target_var = T.ivector('target_var')

    #
    # Build dataset iterator
    #
    if batch_size is not None:
        bs = batch_size
    else:
        bs = [10, 1, 1]


    train_iter = IsbiEmStacksDataset(which_set='train',
                                     batch_size=batch_size[0],
                                     seq_per_subset=0,
                                     seq_length=0,
                                     data_augm_kwargs=data_augmentation,
                                     return_one_hot=False,
                                     return_01c=False,
                                     overlap=0,
                                     use_threads=True,
                                     shuffle_at_each_epoch=True,
                                     return_list=True,
                                     return_0_255=False)

    val_iter = IsbiEmStacksDataset(which_set='val',
                                   batch_size=batch_size[1],
                                   seq_per_subset=0,
                                   seq_length=0,
                                   return_one_hot=False,
                                   return_01c=False,
                                   use_threads=True,
                                   shuffle_at_each_epoch=False,
                                   return_list=True,
                                   return_0_255=False)
    test_iter = None

    batch = train_iter.next()
    input_dim = (np.shape(batch[0])[2], np.shape(batch[0])[3]) #(x,y) image shape


    n_batches_train = train_iter.nbatches
    n_batches_val = val_iter.nbatches
    n_batches_test = test_iter.nbatches if test_iter is not None else 0
    n_classes = train_iter.non_void_nclasses
    void_labels = train_iter.void_labels
    nb_in_channels = train_iter.data_shape[0]

    print("Batch. train: %d, val %d, test %d" % (n_batches_train, n_batches_val, n_batches_test))
    print("Nb of classes: %d" % (n_classes))
    print("Nb. of input channels: %d" % (nb_in_channels))

    #
    # Build network
    #

    net = build_UNet(n_input_channels= nb_in_channels,# BATCH_SIZE = batch_size,
                num_output_classes = n_classes, base_n_filters = 64, do_dropout=False,
                input_dim = (None, None))

    output_layer = net["output_flattened"]
    #
    # Define and compile theano functions
    #
    print("Defining and compiling training functions")
    prediction = lasagne.layers.get_output(output_layer, input_var)
    loss = crossentropy_metric(prediction, target_var, void_labels)

    if weight_decay > 0:
        weightsl2 = regularize_network_params(output_layer, lasagne.regularization.l2)
        loss += weight_decay * weightsl2

    params = lasagne.layers.get_all_params(output_layer, trainable=True)
    updates = lasagne.updates.adam(loss, params, learning_rate=learn_step)

    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    print("Defining and compiling test functions")
    test_prediction = lasagne.layers.get_output(output_layer, input_var,deterministic=True)
    test_loss = crossentropy_metric(test_prediction, target_var, void_labels)
    test_acc = accuracy_metric(test_prediction, target_var, void_labels)
    test_jacc = jaccard_metric(test_prediction, target_var, n_classes)

    val_fn = theano.function([input_var, target_var], [test_loss, test_acc, test_jacc])

    #
    # Train
    #
    err_train = []
    err_valid = []
    acc_valid = []
    jacc_valid = []
    patience = 0

    # Training main loop
    print("Start training")
    for epoch in range(num_epochs):
        # Single epoch training and validation
        start_time = time.time()
        cost_train_tot = 0
        # Train
        print('Training steps ')
        for i in range(n_batches_train):
            print(i)
            # Get minibatch
            X_train_batch, L_train_batch = train_iter.next()
            L_train_batch = np.reshape(L_train_batch, np.prod(L_train_batch.shape))

            # Training step
            cost_train = train_fn(X_train_batch, L_train_batch)
            out_str = "cost %f" % (cost_train)
            cost_train_tot += cost_train

        err_train += [cost_train_tot/n_batches_train]

        # Validation
        cost_val_tot = 0
        acc_val_tot = 0
        jacc_val_tot = np.zeros((2, n_classes))

        print('Validation steps')
        for i in range(n_batches_val):
            print(i)
            # Get minibatch
            X_val_batch, L_val_batch = val_iter.next()
            L_val_batch = np.reshape(L_val_batch, np.prod(L_val_batch.shape))

            # Validation step
            cost_val, acc_val, jacc_val = val_fn(X_val_batch, L_val_batch)

            acc_val_tot += acc_val
            cost_val_tot += cost_val
            jacc_val_tot += jacc_val

        err_valid += [cost_val_tot/n_batches_val]
        acc_valid += [acc_val_tot/n_batches_val]
        jacc_perclass_valid = jacc_val_tot[0, :] / jacc_val_tot[1, :]
        if early_stop_class == None:
            jacc_valid += [np.mean(jacc_perclass_valid)]
        else:
            jacc_valid += [jacc_perclass_valid[early_stop_class]]


        out_str = "EPOCH %i: Avg epoch training cost train %f, cost val %f" +\
            ", acc val %f, jacc val class 0 % f, jacc val class 1 %f, jacc val %f took %f s"
        out_str = out_str % (epoch, err_train[epoch],
                             err_valid[epoch],
                             acc_valid[epoch],
                             jacc_perclass_valid[0],
                             jacc_perclass_valid[1],
                             jacc_valid[epoch],
                             time.time()-start_time)
        print(out_str)

        with open(os.path.join(savepath, "unet_output.log"), "a") as f:
            f.write(out_str + "\n")

        # Early stopping and saving stuff
        if epoch == 0:
            best_jacc_val = jacc_valid[epoch]
        elif epoch > 1 and jacc_valid[epoch] > best_jacc_val:
            best_jacc_val = jacc_valid[epoch]
            patience = 0
            np.savez(os.path.join(savepath, 'new_unet_model_best.npz'),  *lasagne.layers.get_all_param_values(output_layer))
            np.savez(os.path.join(savepath, 'unet_errors_best.npz'), err_valid, err_train, acc_valid, jacc_valid)
        else:
            patience += 1

        np.savez(os.path.join(savepath, 'new_unet_model_last.npz'), *lasagne.layers.get_all_param_values(output_layer))
        np.savez(os.path.join(savepath, 'unet_errors_last.npz'),  err_valid, err_train, acc_valid, jacc_valid)
        # Finish training if patience has expired or max nber of epochs
        # reached
        if patience == max_patience or epoch == num_epochs-1:
            if test_iter is not None:
                # Load best model weights
                with np.load(os.path.join(savepath, 'new_unet_model_best.npz')) as f:
                    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
                nlayers = len(lasagne.layers.get_all_params(output_layer))
                lasagne.layers.set_all_param_values(output_layer, param_values[:nlayers])
                # Test
                cost_test_tot = 0
                acc_test_tot = 0
                jacc_test_tot = np.zeros((2, n_classes))
                for i in range(n_batches_test):
                    # Get minibatch
                    X_test_batch, L_test_batch = test_iter.next()
                    L_test_batch = np.reshape(L_test_batch, np.prod(L_test_batch.shape))

                    # Test step
                    cost_test, acc_test, jacc_test = val_fn(X_test_batch, L_test_batch)

                    acc_test_tot += acc_test
                    cost_test_tot += cost_test
                    jacc_test_tot += jacc_test

                err_test = cost_test_tot/n_batches_test
                acc_test = acc_test_tot/n_batches_test
                jacc_test_perclass = jacc_test_tot[0, :] / jacc_test_tot[1, :]
                jacc_test = np.mean(jacc_test_perclass)

                out_str = "FINAL MODEL: err test % f, acc test %f, " +\
                    "jacc test class 0 %f, jacc test class 1 %f, jacc test %f"
                out_str = out_str % (err_test, acc_test, jacc_test_perclass[0],
                                     jacc_test_perclass[1], jacc_test)
                print(out_str)
            if savepath != loadpath:
                print('Copying model and other training files to {}'.format(loadpath))
                copy_tree(savepath, loadpath)

            # End
            return
Ejemplo n.º 51
0
def train(cf):

    ###############
    #  load data  #
    ###############

    print('-' * 75)
    print('Loading data')
    #TODO ; prepare a public version of the data loader
    train_iter, val_iter, test_iter = load_data(cf.dataset,
                                                train_crop_size=cf.train_crop_size,
                                                batch_size=cf.batch_size,
                                                horizontal_flip=True,
                                                )

    n_classes = train_iter.get_n_classes()
    void_labels = train_iter.get_void_labels()

    print('Number of images : train : {}, val : {}, test : {}'.format(
        train_iter.get_n_samples(), val_iter.get_n_samples(), test_iter.get_n_samples()))

    ###################
    #   Build model   #
    ###################

    # Build model and display summary    
    net = cf.net
    net.summary()

    # Restore
    if hasattr(cf, 'pretrained_model'):
        print('Using a pretrained model : {}'.format(cf.pretrained_model))
        net.restore(cf.pretrained_model)

    # Compile functions
    print('Compilation starts at ' + str(datetime.now()).split('.')[0])
    params = lasagne.layers.get_all_params(net.output_layer, trainable=True)
    lr_shared = theano.shared(np.array(cf.learning_rate, dtype='float32'))
    lr_decay = np.array(cf.lr_sched_decay, dtype='float32')

    # Create loss and metrics
    for key in ['train', 'valid']:

        # LOSS
        pred = get_output(net.output_layer, deterministic=key == 'valid',
                          batch_norm_update_averages=False, batch_norm_use_averages=False)
        loss = crossentropy(pred, net.target_var, void_labels)

        if cf.weight_decay:
            weightsl2 = regularize_network_params(net.output_layer, lasagne.regularization.l2)
            loss += cf.weight_decay * weightsl2

        # METRICS
        I, U, acc = theano_metrics(pred, net.target_var, n_classes, void_labels)

        # COMPILE
        start_time_compilation = time.time()
        if key == 'train':
            updates = cf.optimizer(loss, params, learning_rate=lr_shared)
            train_fn = theano.function([net.input_var, net.target_var], [loss, I, U, acc], updates=updates)
        else:
            val_fn = theano.function([net.input_var, net.target_var], [loss, I, U, acc])

        print('{} compilation took {:.3f} seconds'.format(key, time.time() - start_time_compilation))

    ###################
    #    Main loops   #
    ###################

    # metric's sauce
    init_history = lambda: {'loss': [], 'jaccard': [], 'accuracy': []}
    history = {'train': init_history(), 'val': init_history(), 'test': init_history()}
    patience = 0
    best_jacc_val = 0
    best_epoch = 0

    if hasattr(cf, 'pretrained_model'):
        print('Validation score before training')
        print batch_loop(val_iter, val_fn, 0, 'val', {'val': init_history()})

    # Training main loop
    print('-' * 30)
    print('Training starts at ' + str(datetime.now()).split('.')[0])
    print('-' * 30)

    for epoch in range(cf.num_epochs):

        # Train
        start_time_train = time.time()
        history = batch_loop(train_iter, train_fn, epoch, 'train', history)
        # Validation
        start_time_valid = time.time()
        history = batch_loop(val_iter, val_fn, epoch, 'val', history)

        # Print
        out_str = \
            '\r\x1b[2 Epoch {} took {}+{} sec. ' \
            'loss = {:.5f} | jacc = {:.5f} | acc = {:.5f} || ' \
            'loss = {:.5f} | jacc = {:.5f} | acc = {:.5f}'.format(
                epoch, int(start_time_valid - start_time_train), int(time.time() - start_time_valid),
                history['train']['loss'][-1], history['train']['jaccard'][-1], history['train']['accuracy'][-1],
                history['val']['loss'][-1], history['val']['jaccard'][-1], history['val']['accuracy'][-1])

        # Monitoring jaccard
        if history['val']['jaccard'][-1] > best_jacc_val:
            out_str += ' (BEST)'
            best_jacc_val = history['val']['jaccard'][-1]
            best_epoch = epoch
            patience = 0
            net.save(os.path.join(cf.savepath, 'model.npz'))
        else:
            patience += 1

        print out_str

        np.savez(os.path.join(cf.savepath, 'errors.npz'), metrics=history, best_epoch=best_epoch)

        # Learning rate scheduler
        lr_shared.set_value(lr_shared.get_value() * lr_decay)

        # Finish training if patience has expired or max nber of epochs reached
        if patience == cf.max_patience or epoch == cf.num_epochs - 1:
            # Load best model weights
            net.restore(os.path.join(cf.savepath, 'model.npz'))

            # Test
            print('Training ends\nTest')
            if test_iter.get_n_samples() == 0:
                print 'No test set'
            else:
                history = batch_loop(test_iter, val_fn, epoch, 'test', history)

                print ('Average cost test = {:.5f} | jacc test = {:.5f} | acc_test = {:.5f} '.format(
                    history['test']['loss'][-1],
                    history['test']['jaccard'][-1],
                    history['test']['accuracy'][-1]))

                np.savez(os.path.join(cf.savepath, 'errors.npz'), metrics=history, best_epoch=best_epoch)

            # Exit
            return
OUTPUT = open(progress_filename, 'w')
OUTPUT.write("NUM_PARAMS,"+str(lasagne.layers.count_params(cnn_model['output']))+'\n')
OUTPUT.write("EPOCH,RMSE,MSE\n")
OUTPUT.close()

#mulitply our training predictions and visualizations by 1.0
#   this makes it so these numbers are part of the theano graph but not changed in value
#   in this way, theano doesn't complain at me for unused variables
context_output_train = lasagne.layers.get_output(cnn_model['output'],deterministic=False)
train_prediction = context_output_train[0] * 1.0
visual_predictions_train = context_output_train[1] * 1.0
train_prediction = train_prediction.flatten()
train_loss = lasagne.objectives.squared_error(target_vals,train_prediction)

#get our loss and our cost
l2_loss = regularize_network_params(cnn_model['output'],l2)
train_cost = T.mean(train_loss) + l2_loss*l2_regularization_lambda

#then get our parameters and update from lasagne
params = lasagne.layers.get_all_params(cnn_model['output'], trainable=True)
updates = lasagne.updates.adam(train_cost, params, learning_rate=learning_rate)

#then get the outputs for the test and multiply them by 1.0 like above
context_output_test = lasagne.layers.get_output(cnn_model['output'],deterministic=True)
test_predicition = context_output_test[0] * 1.0
visual_predictions_test = context_output_test[1] * 1.0
test_predicition = test_predicition.flatten()
test_cost = lasagne.objectives.squared_error(target_vals,test_predicition)

#then define my theano functions for train and test
train_func = theano.function([input_atom,input_bonds,input_atom_index,\
Ejemplo n.º 53
0
    def initialize_network(self):
        """
        :description: this method initializes the network, updates, and theano functions for training and 
            retrieving q values. Here's an outline: 

            1. build the q network and target q network
            2. initialize theano symbolic variables used for compiling functions
            3. initialize the theano numeric variables used as input to functions
            4. formulate the symbolic loss 
            5. formulate the symbolic updates 
            6. compile theano functions for training and for getting q_values
        """
        batch_size, input_shape = self.batch_size, self.input_shape
        lasagne.random.set_rng(self.rng)

        # 1. build the q network and target q network
        self.l_out = self.build_network(input_shape, self.num_actions, batch_size)
        self.next_l_out = self.build_network(input_shape, self.num_actions, batch_size)
        self.reset_target_network()

        # 2. initialize theano symbolic variables used for compiling functions
        states = T.tensor4('states')
        actions = T.icol('actions')
        rewards = T.col('rewards')
        next_states = T.tensor4('next_states')
        # terminals are used to indicate a terminal state in the episode and hence a mask over the future
        # q values i.e., Q(s',a')
        terminals = T.icol('terminals')

        # 3. initialize the theano numeric variables used as input to functions
        self.states_shape = (batch_size,) + (1,) + input_shape
        self.states_shared = theano.shared(np.zeros(self.states_shape, dtype=theano.config.floatX))
        self.next_states_shared = theano.shared(np.zeros(self.states_shape, dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(np.zeros((batch_size, 1), dtype=theano.config.floatX), 
            broadcastable=(False, True))
        self.actions_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))
        self.terminals_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        # 4. formulate the symbolic loss 
        q_vals = lasagne.layers.get_output(self.l_out, states)
        next_q_vals = lasagne.layers.get_output(self.next_l_out, next_states)
        target = (rewards +
                 (T.ones_like(terminals) - terminals) *
                  self.discount * T.max(next_q_vals, axis=1, keepdims=True))
        # reshape((-1,)) == 'make a row vector', reshape((-1, 1) == 'make a column vector'
        diff = target - q_vals[T.arange(batch_size), actions.reshape((-1,))].reshape((-1, 1))


        # a lot of the deepmind work clips the td error at 1 so we do that here
        # the problem is that gradient backpropagating through this minimum node
        # will be zero if diff is larger then 1.0 (because changing params before
        # the minimum does not impact the output of the minimum). To account for 
        # this we take the part of the td error (magnitude) greater than 1.0 and simply
        # add it to the loss, which allows gradient to backprop but just linearly
        # in the td error rather than quadratically
        quadratic_part = T.minimum(abs(diff), 1.0)
        linear_part = abs(diff) - quadratic_part
        loss = 0.5 * quadratic_part ** 2 + linear_part
        loss = T.mean(loss) + self.regularization * regularize_network_params(self.l_out, l2)

        # 5. formulate the symbolic updates 
        params = lasagne.layers.helper.get_all_params(self.l_out)  
        updates = self.initialize_updates(self.update_rule, loss, params, self.learning_rate)

        # 6. compile theano functions for training and for getting q_values
        givens = {
            states: self.states_shared,
            next_states: self.next_states_shared,
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }
        self._train = theano.function([], [loss, q_vals], updates=updates, givens=givens)
        self._get_q_values = theano.function([], q_vals, givens={states: self.states_shared})
Ejemplo n.º 54
0
def test_space_invaders(game_title='SpaceInvaders-v0',
                        n_parallel_games=3,
                        replay_seq_len=2,
                        ):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None,) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states
    window_size = 3

    # prev state input
    prev_window = InputLayer((None, window_size) + tuple(observation_reshape.output_shape[1:]),
                             name="previous window state")

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")

    memory_dict = {window: prev_window}

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc

    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None,) + window.output_shape[2:])

    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(window_max, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    #fakes for a2c
    policy_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.softmax,
                        name="a2c action probas")
    state_value_eval = DenseLayer(nn,
                        num_units=1,
                        nonlinearity=None,
                        name="a2c state values")
    # resolver
    resolver = ProbabilisticResolver(policy_eval,  name="resolver")

    # agent
    agent = Agent(observation_layer,
                  memory_dict,
                  (q_eval,policy_eval,state_value_eval), resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [np.zeros((batch_size,) + tuple(mem.output_shape[1:]),
                                      dtype='float32')
                             for mem in agent.agent_states]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor, is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values, policy, etc obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, estimators = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )
    (q_values_sequence,policy_sequence,value_sequence) = estimators

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working

    elwise_mse_loss = 0.
    
    #1-step algos
    for algo in qlearning,sarsa:
        elwise_mse_loss += algo.get_elementwise_objective(q_values_sequence,
                                                              env.actions[0],
                                                              scaled_reward_seq,
                                                              env.is_alive,
                                                              gamma_or_gammas=0.99, )
    #qlearning_n_step
    for n in (1,3,replay_seq_len-1, replay_seq_len, replay_seq_len+1,None):
        elwise_mse_loss += qlearning_n_step.get_elementwise_objective(q_values_sequence,
                                                              env.actions[0],
                                                              scaled_reward_seq,
                                                              env.is_alive,
                                                              gamma_or_gammas=0.99,
                                                              n_steps=n)
        
    #a2c n_step
    
    elwise_mse_loss += a2c_n_step.get_elementwise_objective(policy_sequence,
                                                            value_sequence[:,:,0],
                                                            env.actions[0],
                                                            scaled_reward_seq,
                                                            env.is_alive,
                                                            gamma_or_gammas=0.99,
                                                            n_steps=3)
    
    

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10 ** -4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward], updates=updates)
    evaluation_fun = theano.function([], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " % (
            epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
Ejemplo n.º 55
0
def test_memory(game_title='SpaceInvaders-v0',
                        n_parallel_games=3,
                        replay_seq_len=2,
                        ):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None,) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states
    
    memory_dict = OrderedDict([])
    
    
    ###Window
    window_size = 3

    # prev state input
    prev_window = InputLayer((None, window_size) + tuple(observation_reshape.output_shape[1:]),
                             name="previous window state")
    

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")
    
    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None,) + window.output_shape[2:])

    
    memory_dict[window] = prev_window
    
    ###Stack
    #prev stack
    stack_w,stack_h = 4, 5
    stack_inputs = DenseLayer(observation_reshape,stack_w,name="prev_stack")
    stack_controls = DenseLayer(observation_reshape,3,
                              nonlinearity=lasagne.nonlinearities.softmax,
                              name="prev_stack")
    prev_stack = InputLayer((None,stack_h,stack_w),
                             name="previous stack state")
    stack = StackAugmentation(stack_inputs,prev_stack, stack_controls)
    memory_dict[stack] = prev_stack
    
    stack_top = lasagne.layers.SliceLayer(stack,0,1)

    
    ###RNN preset
    
    prev_rnn = InputLayer((None,16),
                             name="previous RNN state")
    new_rnn = RNNCell(prev_rnn,observation_reshape)
    memory_dict[new_rnn] = prev_rnn
    
    ###GRU preset
    prev_gru = InputLayer((None,16),
                             name="previous GRUcell state")
    new_gru = GRUCell(prev_gru,observation_reshape)
    memory_dict[new_gru] = prev_gru
    
    ###GRUmemorylayer
    prev_gru1 = InputLayer((None,15),
                             name="previous GRUcell state")
    new_gru1 = GRUMemoryLayer(15,observation_reshape,prev_gru1)
    memory_dict[new_gru1] = prev_gru1
    
    #LSTM with peepholes
    prev_lstm0_cell = InputLayer((None,13),
                             name="previous LSTMCell hidden state [with peepholes]")
    
    prev_lstm0_out = InputLayer((None,13),
                             name="previous LSTMCell output state [with peepholes]")

    new_lstm0_cell,new_lstm0_out = LSTMCell(prev_lstm0_cell,prev_lstm0_out,
                                            input_or_inputs = observation_reshape,
                                            peepholes=True,name="newLSTM1 [with peepholes]")
    
    memory_dict[new_lstm0_cell] = prev_lstm0_cell
    memory_dict[new_lstm0_out] = prev_lstm0_out


    #LSTM without peepholes
    prev_lstm1_cell = InputLayer((None,14),
                             name="previous LSTMCell hidden state [no peepholes]")
    
    prev_lstm1_out = InputLayer((None,14),
                             name="previous LSTMCell output state [no peepholes]")

    new_lstm1_cell,new_lstm1_out = LSTMCell(prev_lstm1_cell,prev_lstm1_out,
                                            input_or_inputs = observation_reshape,
                                            peepholes=False,name="newLSTM1 [no peepholes]")
    
    memory_dict[new_lstm1_cell] = prev_lstm1_cell
    memory_dict[new_lstm1_out] = prev_lstm1_out
    
    ##concat everything
    
    for i in [flatten(window_max),stack_top,new_rnn,new_gru,new_gru1]:
        print(i.output_shape)
    all_memory = concat([flatten(window_max),stack_top,new_rnn,new_gru,new_gru1,new_lstm0_out,new_lstm1_out,])
    
    
    

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc


    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(all_memory, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    # resolver
    resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver")

    # agent
    agent = Agent(observation_layer,
                  memory_dict,
                  q_eval, resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [np.zeros((batch_size,) + tuple(mem.output_shape[1:]),
                                      dtype='float32')
                             for mem in agent.agent_states]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor, is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working


    elwise_mse_loss = qlearning.get_elementwise_objective(q_values_sequence,
                                                          env.actions[0],
                                                          scaled_reward_seq,
                                                          env.is_alive,
                                                          gamma_or_gammas=0.99, )

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10 ** -4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward], updates=updates)
    evaluation_fun = theano.function([], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " % (
            epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
Ejemplo n.º 56
0
def test_reasoning_value_based(n_parallel_games=25,
                               algo = qlearning,
                               n_steps=1
                  ):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param algo: training algorithm to use (module)
    """
    # instantiate an experiment environment with default parameters
    env = experiment.BooleanReasoningEnvironment()

    # hidden neurons
    n_hidden_neurons = 64

    observation_size = (None,) + tuple(env.observation_shapes)

    observation_layer = lasagne.layers.InputLayer(observation_size, name="observation_input")
    prev_state_layer = lasagne.layers.InputLayer([None, n_hidden_neurons], name="prev_state_input")

    # memory layer (this isn't the same as lasagne recurrent units)
    rnn = RNNCell(prev_state_layer, observation_layer, name="rnn0")

    # q_values (estimated using very simple neural network)
    q_values = lasagne.layers.DenseLayer(rnn,
                                         num_units=env.n_actions,
                                         nonlinearity=lasagne.nonlinearities.linear,
                                         name="QEvaluator")

    # resolver uses epsilon - parameter which defines a probability of randomly taken action.
    epsilon = theano.shared(np.float32(0.1), name="e-greedy.epsilon")
    resolver = EpsilonGreedyResolver(q_values, epsilon=epsilon, name="resolver")


    # packing this into agent
    agent = Agent(observation_layer,
                  agent_states={rnn:prev_state_layer},
                  policy_estimators=q_values, 
                  action_layers=resolver)
    
    # Since it's a lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver,trainable=True)

    
    # produce interaction sequences of length <= 10
    (state_seq,), observation_seq, agent_state, action_seq, qvalues_seq = agent.get_sessions(
        env,
        session_length=10,
        batch_size=env.batch_size,
    )

    hidden_seq = agent_state[rnn]

    # get rewards for all actions
    rewards_seq = env.get_reward_sequences(state_seq, action_seq)

    # get indicator whether session is still active
    is_alive_seq = env.get_whether_alive(observation_seq)
    
    

    # gamma - delayed reward coefficient - what fraction of reward is retained if it is obtained one tick later
    gamma = theano.shared(np.float32(0.99), name='q_learning_gamma')

    squarred_Qerror = algo.get_elementwise_objective(
        qvalues_seq,
        action_seq,
        rewards_seq,
        is_alive_seq,
        gamma_or_gammas=gamma)

    # take sum over steps, average over sessions
    mse_Qloss = squarred_Qerror.sum(axis=1).mean()
    
    
    # impose l2 regularization on network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10**-3

    loss = mse_Qloss + reg_l2
    
    
    # compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.1)
    # take sum over steps, average over sessions
    mean_session_reward = rewards_seq.sum(axis=1).mean()

    train_fun = theano.function([], [loss, mean_session_reward], updates=updates)

    compute_mean_session_reward = theano.function([], mean_session_reward)


    score_log = Metrics()
        
    for epoch in range(5000):        

        # update resolver's epsilon (chance of random action instead of optimal one)
        # epsilon decreases over time
        current_epsilon = 0.05 + 0.95 * np.exp(-epoch / 2500.)
        resolver.epsilon.set_value(np.float32(current_epsilon))

        # train
        env.generate_new_data_batch(n_parallel_games)
        loss, avg_reward = train_fun()

        # show current learning progress
        if epoch % 100 == 0:
            print(epoch),

            # estimate reward for epsilon-greedy strategy
            avg_reward_current = compute_mean_session_reward()
            score_log["expected epsilon-greedy reward"][epoch] = avg_reward_current

            # estimating the reward under assumption of greedy strategy
            resolver.epsilon.set_value(0)
            avg_reward_greedy = compute_mean_session_reward()
            score_log["expected greedy reward"][epoch] = avg_reward_greedy
            
            
            if avg_reward_greedy > 2:
                print("converged")
                break
    else:
        print("diverged")
        raise ValueError("Algorithm diverged")
            
Ejemplo n.º 57
0
def build_update_functions(train_set_x, train_set_y,
                           valid_set_x, valid_set_y,
                           network,
                           y, X,
                           train_MASK, val_MASK,
                           batch_size=32,
                           l2_reg=.0001,
                           learning_rate=.005,
                           momentum=.9):
    # build update functions
    # extract tensor representing the network predictions
    prediction = get_output(network)
    ################################################
    ##################old###########################
    # # collect squared error
    # loss_RMSE = squared_error(prediction, y)
    # # compute the root mean squared error
    # loss_RMSE = loss_RMSE.mean().sqrt()
    ###################New#########################
    # Aggregate the element-wise error into a scalar value using a mask
    # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It
    # is not used to calculate the aggregated error and update of the network.
    # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else.

    # build tensor variable for mask
    trainMASK = T.matrix('trainMASK')
    # collect squared error
    loss_RMSE = squared_error(prediction, y)
    # Drop nan values and average over the remaining values
    loss_RMSE = aggregate(loss_RMSE, weights=trainMASK, mode='normalized_sum')
    # compute the square root
    loss_RMSE = loss_RMSE.sqrt()
    ###############################################

    # add l2 regularization
    l2_penalty = regularize_network_params(network, l2)
    loss = (1 - l2_reg) * loss_RMSE + l2_reg * l2_penalty
    # get network params
    params = get_all_params(network, trainable = True)

    #     # create update criterion
    # print('nestrov')
    # updates = nesterov_momentum( loss, params, learning_rate=.01, momentum=.9)

    # print('AdaGrad')
    # updates = adagrad(loss, params,learning_rate= 1e-2)
    #
    print('RMSPROP \n')
    updates = rmsprop(loss, params, learning_rate=learning_rate)
    # create validation/test loss expression
    # the loss represents the loss for all the labels
    test_prediction = get_output(network, deterministic=True)
    ################################################
    ##################old###########################
    #     # collect squared error
    #     test_loss = squared_error(test_prediction,y)
    #     # compute the root mean squared error
    #     test_loss = test_loss.mean().sqrt()
    # #     test_loss_withl2 = (1-l2_reg) * test_loss + l2_reg * l2_penalty
    ################################################
    ###################New#########################
    # Aggregate the element-wise error into a scalar value using a mask
    # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It
    # is not used to calculate the aggregated error and update of the network.
    # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else.


    # build tensor variable for mask
    valMASK = T.matrix('valMASK')
    # collect squared error
    test_loss = squared_error(test_prediction, y)
    # Drop nan values and average over the remaining values
    test_loss = aggregate(test_loss, weights=valMASK, mode='normalized_sum')
    # compute the square root
    test_loss = test_loss.sqrt()
    ################################################
    # index for mini-batch slicing
    index = T.lscalar()

    # training function
    train_set_x_size = train_set_x.get_value().shape[0]
    val_set_x_size = valid_set_x.get_value().shape[0]

    train_fn = theano.function(inputs=[index],
                               outputs=[loss, loss_RMSE],
                               updates=updates,
                               givens={X: train_set_x[
                                          index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)],
                                       y: train_set_y[
                                          index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)],
                                       trainMASK: train_MASK[index * batch_size: T.minimum((index + 1) * batch_size,
                                                                                           train_set_x_size)]})
    # validation function
    val_fn = theano.function(inputs=[index],
                             outputs=[test_loss, prediction],
                             givens={X: valid_set_x[
                                        index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)],
                                     y: valid_set_y[
                                        index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)],
                                     valMASK: val_MASK[
                                              index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)]})
    return train_fn, val_fn
Ejemplo n.º 58
0
def train(model, batch_size = 200, learning_rate=0.1):
    np.random.seed(5468)
    net = model()

    x = net['input'].input_var
    y = T.ivector('y')

    print("........ building model")
    prediction = lasagne.layers.get_output(net['output'],x)

    train_prediction = lasagne.layers.get_output(net['output'], x, deterministic=False)
    test_prediction = lasagne.layers.get_output(net['output'], x, deterministic=True)
    global_avg = lasagne.layers.get_output(net['global_avg'],x)
    before_avg = lasagne.layers.get_output(net['conv7_1'],x)

    lamda = 0.001
    l2_penalty = regularize_network_params(net['output'], l2)

    loss = lasagne.objectives.categorical_crossentropy(train_prediction, y)
    loss_train = lasagne.objectives.aggregate(loss, mode='mean') + lamda*l2_penalty

    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, y)
    loss_test = lasagne.objectives.aggregate(test_loss, mode='mean')


    params = lasagne.layers.get_all_params(net['output'], trainable=True)


    lr_theano = T.fscalar()

    updates = lasagne.updates.momentum(loss_train, params, momentum=np.float32(0.9), learning_rate=lr_theano)
#     updates = gradient_descend_momentum(cost=loss_train, params=params, lr=lr_theano, m=np.float32(0.9))

    lr_epochs = [200,250, 300]

    y_pred = T.argmax(test_prediction, axis=1)
    errors = T.mean(T.neq(y_pred, y))

    test_prediction_fn = theano.function(inputs=[x], outputs=test_prediction)

    index = T.iscalar()


    # Load Dataset
    train_x, train_y, test_x, test_y = load_cifar_whitened()
    valid_x, valid_y = test_x, test_y

    n_train_batches = train_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_x.get_value(borrow=True).shape[0] // batch_size
    
    train_model = theano.function(inputs=[index, lr_theano], outputs=[loss_train], updates=updates,
            givens={
                x: train_x[index*batch_size:(index+1)*batch_size],
                y: train_y[index*batch_size:(index+1)*batch_size]
                })

    validate_model = theano.function(inputs=[index], outputs=[errors],
            givens={
                x: valid_x[index*batch_size:(index+1)*batch_size],
                y: valid_y[index*batch_size:(index+1)*batch_size]
                })

    test_model = theano.function(inputs=[index], outputs=[errors],
            givens={
                x: test_x[index*batch_size:(index+1)*batch_size],
                y: test_y[index*batch_size:(index+1)*batch_size]
                })
    get_pred = theano.function(inputs=[index], outputs=[y_pred],
            givens={
                x: test_x[index*batch_size:(index+1)*batch_size]
                })
   

    global_avg_fn = theano.function(inputs=[index], outputs=[global_avg],
            givens={
                x: train_x[index*batch_size:(index+1)*batch_size]
                })

    before_avg_fn = theano.function(inputs=[index], outputs=[before_avg],
            givens={
                x: train_x[index*batch_size:(index+1)*batch_size]
                })

    print("........ training")
    model_name = model.__name__
    n_epochs=350
    lr_epochs=[200, 250, 300]
    verbose = True
    lr = learning_rate
    """
    Wrapper function for training and test THEANO model

    :type train_model: Theano.function
    :param train_model:

    :type validate_model: Theano.function
    :param validate_model:

    :type test_model: Theano.function
    :param test_model:

    :type n_train_batches: int
    :param n_train_batches: number of training batches

    :type n_valid_batches: int
    :param n_valid_batches: number of validation batches

    :type n_test_batches: int
    :param n_test_batches: number of testing batches

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to

    """

    # early-stopping parameters
    patience = 100000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.9995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    best_epoch = 0
    done_looping = False

    curframe = inspect.currentframe()
    calframe = inspect.getouterframes(curframe, 2)

    while (epoch < n_epochs) and (not done_looping):
        if epoch % 50 == 0  or epoch in lr_epochs:
            save_model(net['output'], "{0}_{1}.pklz".format(model_name, epoch))
        if epoch in lr_epochs:
            lr *= 0.1
        epoch = epoch + 1
        
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter % 100 == 0) and verbose:
                print('training @ iter = ', iter, file=sys.stderr)
            
            cost_ij = train_model(minibatch_index, lr)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
      
                this_validation_loss = np.mean(validation_losses)

                if verbose:
                    print('epoch %i, loss %f, minibatch %i/%i, validation error %f %%' %
                        (epoch,
                        cost_ij[0],
                         minibatch_index + 1,
                         n_train_batches,
                         this_validation_loss * 100.), file=sys.stderr)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    best_epoch = epoch
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)

                    if verbose:
                        csvfile = open(model_name + '_results.csv', 'a')
                        resultswriter = csv.writer(csvfile)
                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1,
                               n_train_batches,
                               test_score * 100.), file=sys.stderr)
                        resultswriter.writerow([best_validation_loss, epoch, best_iter, n_train_batches, test_score, model_name, learning_rate])
                        csvfile.close()

            if patience <= iter or (best_validation_loss == 0.0 and test_score == 0.0):
                done_looping = True
                break

    end_time = timeit.default_timer()

    # Retrieve the name of function who invokes train_nn() (caller's name)

    # Print out summary
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The training process for function ' +
           calframe[1][3] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)))
Ejemplo n.º 59
0
def train(dataset, learning_rate=0.0005,
          weight_decay=0.001, num_epochs=500,
          max_patience=25, data_augmentation={},
          savepath=None, loadpath=None,
          batch_size=None, resume=False):

    if savepath is None:
        raise ValueError('A saving directory must be specified')

    if batch_size is None:
        batch_size = [1024, 1024, 1]

    # Model hyperparameters
    n_filters = 64
    filter_size = 25
    depth = 8
    block = 'bn_relu_conv'

    # Hyperparameters for the dataset loader
    smooth_or_raw = 'both'  # use both input channels
    shuffle_at_each_epoch = True

    #
    # Prepare load/save directories
    #

    exp_name = 'fcn1D'
    exp_name += '_lrate=' + str(learning_rate)
    exp_name += '_fil=' + str(n_filters)
    exp_name += '_fsizes=' + str(filter_size)
    exp_name += '_depth=' + str(depth)
    exp_name += '_data=' + smooth_or_raw
    exp_name += '_decay=' + str(weight_decay)
    exp_name += '_pat=' + str(max_patience)

    savepath = os.path.join(savepath, dataset, exp_name)
    loadpath = os.path.join(loadpath, dataset, exp_name)
    print('Savepath : ')
    print(savepath)
    print('Loadpath : ')
    print(loadpath)

    if not os.path.exists(savepath):
        os.makedirs(savepath)
    else:
        print('\033[93m The following folder already exists {}. '
              'It will be overwritten in a few seconds...\033[0m'.format(
            savepath))

    print('Saving directory : ' + savepath)
    with open(os.path.join(savepath, "config.txt"), "w") as f:
        for key, value in locals().items():
            f.write('{} = {}\n'.format(key, value))

    #
    # Define symbolic variables
    #
    input_var = T.tensor3('input_var')  # n_example*nb_in_channels*ray_size
    target_var = T.ivector('target_var')  # n_example*ray_size
    # learning rate is defined below as a theano variable.
    learn_step = theano.shared(np.array(learning_rate, dtype=theano.config.floatX))

    #
    # Build dataset iterator
    #

    if smooth_or_raw == 'both':
        nb_in_channels = 2
        use_threads = False
    else:
        nb_in_channels = 1
        use_threads = True

    train_iter = Cortical6LayersDataset(
        which_set='train',
        smooth_or_raw=smooth_or_raw,
        batch_size=batch_size[0],
        data_augm_kwargs=data_augmentation,
        shuffle_at_each_epoch=True,
        return_one_hot=False,
        return_01c=False,
        return_list=False,
        use_threads=use_threads,
        preload=True)

    val_iter = Cortical6LayersDataset(
        which_set='valid',
        smooth_or_raw=smooth_or_raw,
        batch_size=batch_size[1],
        shuffle_at_each_epoch=True,
        return_one_hot=False,
        return_01c=False,
        return_list=False,
        use_threads=use_threads,
        preload=True)

    test_iter = None

    n_batches_train = train_iter.nbatches
    n_batches_val = val_iter.nbatches
    n_batches_test = test_iter.nbatches if test_iter is not None else 0
    n_classes = train_iter.non_void_nclasses
    void_labels = train_iter.void_labels

    #
    # Build network
    #
    simple_net_output, net = build_model(input_var,
                                         filter_size=filter_size,
                                         n_filters=n_filters,
                                         depth=depth,
                                         block=block,
                                         nb_in_channels=nb_in_channels,
                                         n_classes=n_classes)

    #
    # Define and compile theano functions
    #
    print("Defining and compiling training functions")

    prediction = lasagne.layers.get_output(simple_net_output[0])
    loss = categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    if weight_decay > 0:
        weightsl2 = regularize_network_params(
            simple_net_output, lasagne.regularization.l2)
        loss += weight_decay * weightsl2

    train_acc = accuracy_metric(prediction, target_var, void_labels)

    params = lasagne.layers.get_all_params(simple_net_output, trainable=True)
    updates = lasagne.updates.adam(loss, params, learning_rate=learn_step)

    train_fn = theano.function([input_var, target_var], [loss, train_acc], updates=updates)

    print("Done")

    print("Defining and compiling valid functions")
    valid_prediction = lasagne.layers.get_output(simple_net_output[0], deterministic=True)
    valid_loss = categorical_crossentropy(valid_prediction, target_var).mean()
    valid_acc = accuracy_metric(valid_prediction, target_var, void_labels)
    valid_jacc = jaccard(valid_prediction, target_var, n_classes)

    valid_fn = theano.function([input_var, target_var], [valid_loss, valid_acc, valid_jacc])
    print("Done")

    #
    # Train loop
    #
    err_train = []
    acc_train = []

    err_valid = []
    acc_valid = []
    jacc_valid = []
    patience = 0

    # Training main loop
    print("Start training")

    for epoch in range(num_epochs):
        learn_step.set_value((learn_step.get_value() * 0.99).astype(theano.config.floatX))

        # Single epoch training and validation
        start_time = time.time()
        # Cost train and acc train for this epoch
        cost_train_epoch = 0
        acc_train_epoch = 0

        for i in range(n_batches_train):
            # Get minibatch (comment the next line if only 1 minibatch in training)
            train_batch = train_iter.next()
            X_train_batch, L_train_batch, idx_train_batch = train_batch['data'], train_batch['labels'], \
                                                            train_batch['filenames'][0]
            L_train_batch = np.reshape(L_train_batch, np.prod(L_train_batch.shape))

            # Training step
            cost_train_batch, acc_train_batch = train_fn(X_train_batch, L_train_batch)

            # Update epoch results
            cost_train_epoch += cost_train_batch
            acc_train_epoch += acc_train_batch

        # Add epoch results
        err_train += [cost_train_epoch / n_batches_train]
        acc_train += [acc_train_epoch / n_batches_train]

        # Validation
        cost_val_epoch = 0
        acc_val_epoch = 0
        jacc_val_epoch = np.zeros((2, n_classes))

        for i in range(n_batches_val):
            # Get minibatch (comment the next line if only 1 minibatch in training)
            val_batch = val_iter.next()
            X_val_batch, L_val_batch, idx_val_batch = val_batch['data'], val_batch['labels'], val_batch['filenames'][0]
            L_val_batch = np.reshape(L_val_batch, np.prod(L_val_batch.shape))

            # Validation step
            cost_val_batch, acc_val_batch, jacc_val_batch = valid_fn(X_val_batch, L_val_batch)

            # Update epoch results
            cost_val_epoch += cost_val_batch
            acc_val_epoch += acc_val_batch
            jacc_val_epoch += jacc_val_batch

        # Add epoch results
        err_valid += [cost_val_epoch / n_batches_val]
        acc_valid += [acc_val_epoch / n_batches_val]
        jacc_perclass_valid = jacc_val_epoch[0, :] / jacc_val_epoch[1, :]
        jacc_valid += [np.mean(jacc_perclass_valid)]
        # worse_indices_valid += [worse_indices_val_epoch]

        # Print results (once per epoch)

        out_str = ("EPOCH %i: Avg cost train %f, acc train %f" +
                   ", cost val %f, acc val %f, jacc val per class %s, "
                   "jacc val %f took %f s")
        out_str = out_str % (epoch, err_train[epoch],
                             acc_train[epoch],
                             err_valid[epoch],
                             acc_valid[epoch],
                             ['%d: %f' % (i, j)
                              for i, j in enumerate(jacc_perclass_valid)],
                             jacc_valid[epoch],
                             time.time() - start_time)
        print(out_str)

        # Early stopping and saving stuff

        with open(os.path.join(savepath, "fcn1D_output.log"), "a") as f:
            f.write(out_str + "\n")

        if epoch == 0:
            best_jacc_val = jacc_valid[epoch]
        elif epoch > 1 and jacc_valid[epoch] > best_jacc_val:
            print('saving best (and last) model')
            best_jacc_val = jacc_valid[epoch]
            patience = 0
            np.savez(os.path.join(savepath, 'new_fcn1D_model_best.npz'),
                     *lasagne.layers.get_all_param_values(simple_net_output))
            np.savez(os.path.join(savepath, "fcn1D_errors_best.npz"),
                     err_train=err_train, acc_train=acc_train,
                     err_valid=err_valid, acc_valid=acc_valid, jacc_valid=jacc_valid)
        else:
            patience += 1
            print('saving last model')

        np.savez(os.path.join(savepath, 'new_fcn1D_model_last.npz'),
                 *lasagne.layers.get_all_param_values(simple_net_output))
        np.savez(os.path.join(savepath, "fcn1D_errors_last.npz"),
                 err_train=err_train, acc_train=acc_train,
                 err_valid=err_valid, acc_valid=acc_valid, jacc_valid=jacc_valid)
        # Finish training if patience has expired or max nber of epochs reached

        if patience == max_patience or epoch == num_epochs - 1:
            if savepath != loadpath:
                print('Copying model and other training files to {}'.format(loadpath))
                copy_tree(savepath, loadpath)
            break