Example #1
0
    def __init__(self, network, config=None):
        """
        Basic neural network trainer.
        :type network: deepy.NeuralNetwork
        :type config: deepy.conf.TrainerConfig
        :return:
        """
        super(NeuralTrainer, self).__init__()

        self.config = None
        if isinstance(config, TrainerConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = TrainerConfig(config)
        else:
            self.config = TrainerConfig()
        # Model and network all refer to the computational graph
        self.model = self.network = network

        self.network.prepare_training()
        self._setup_costs()

        self.evaluation_func = None

        self.validation_frequency = self.config.validation_frequency
        self.min_improvement = self.config.min_improvement
        self.patience = self.config.patience
        self._iter_callbacks = []

        self.best_cost = 1e100
        self.best_iter = 0
        self.best_params = self.copy_params()
        self._skip_batches = 0
        self._progress = 0
        self.last_cost = 0
Example #2
0
    def __init__(self, network, method=None, config=None, annealer=None, validator=None):

        if method:
            logging.info("changing optimization method to '%s'" % method)
            if not config:
                config = TrainerConfig()
            elif isinstance(config, dict):
                config = TrainerConfig(config)
            config.method = method

        super(GeneralNeuralTrainer, self).__init__(network, config, annealer=annealer, validator=validator)

        self._learning_func = None
Example #3
0
    def __init__(self, network, config=None):
        """
        Basic neural network trainer.
        :type network: deepy.NeuralNetwork
        :type config: deepy.conf.TrainerConfig
        :return:
        """
        super(NeuralTrainer, self).__init__()

        self.config = None
        if isinstance(config, TrainerConfig):
            self.config = config
        elif isinstance(config, dict):
            self.config = TrainerConfig(config)
        else:
            self.config = TrainerConfig()
        self.network = network

        self.network.prepare_training()

        self._setup_costs()

        logging.info("compile evaluation function")
        self.evaluation_func = theano.function(
            network.input_variables + network.target_variables,
            self.evaluation_variables,
            updates=network.updates,
            allow_input_downcast=True,
            mode=self.config.get("theano_mode", None))
        self.learning_func = None

        self.validation_frequency = self.config.validation_frequency
        self.min_improvement = self.config.min_improvement
        self.patience = self.config.patience

        self.best_cost = 1e100
        self.best_iter = 0
        self.best_params = self._copy_network_params()
        self._skip_batches = 0
        self._progress = 0
Example #4
0
    def __init__(self, network, config=None, method=None):

        if method:
            logging.info("changing optimization method to '%s'" % method)
            if not config:
                config = TrainerConfig()
            elif isinstance(config, dict):
                config = TrainerConfig(config)
            config.method = method

        super(GeneralNeuralTrainer, self).__init__(network, config)

        logging.info('compiling %s learning function', self.__class__.__name__)

        network_updates = list(network.updates) + list(
            network.training_updates)
        learning_updates = list(self.learning_updates())
        update_list = network_updates + learning_updates
        logging.info("network updates: %s" %
                     " ".join(map(str, [x[0] for x in network_updates])))
        logging.info("learning updates: %s" %
                     " ".join(map(str, [x[0] for x in learning_updates])))

        if False and config.data_transmitter:
            variables = [config.data_transmitter.get_iterator()]
            givens = config.data_transmitter.get_givens()
        else:
            variables = network.input_variables + network.target_variables
            givens = None

        self.learning_func = theano.function(
            variables,
            map(lambda v: theano.Out(v, borrow=True), self.training_variables),
            updates=update_list,
            allow_input_downcast=True,
            mode=self.config.get("theano_mode", None),
            givens=givens)
Example #5
0
 def __init__(self, state_num, action_num, experience_replay=True):
     self.state_num = state_num
     self.action_num = action_num
     self.experience_replay = experience_replay
     self.experience_pool = []
     self.model = get_model(state_num, action_num)
     train_conf = TrainerConfig()
     train_conf.learning_rate = LEARNING_RATE
     train_conf.weight_l2 = 0
     self.trainer = SGDTrainer(self.model, train_conf)
     self.trainer.training_names = []
     self.trainer.training_variables = []
     self.thread_lock = threading.Lock()
     self.epsilon = EPSILON
     self.tick = 0
Example #6
0
def optimize_updates(params, gradients, config=None, shapes=None):
    """
    General optimization function for Theano.
    Parameters:
        params - parameters
        gradients - gradients
        config - training config
    Returns:
        Theano updates
    :type config: deepy.TrainerConfig or dict
    """
    if config and isinstance(config, dict):
        config = TrainerConfig(config)

    # Clipping
    if config:
        clip_value = config.get("gradient_clipping", None)

        if clip_value:
            clip_constant = T.constant(clip_value, dtype=FLOATX)

            if config.avoid_compute_embed_norm:
                grad_norm = multiple_l2_norm([t[1] for t in zip(params, gradients) if not t[0].name.startswith("W_embed")])
            else:
                grad_norm = multiple_l2_norm(gradients)
            isnan = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
            multiplier = ifelse(grad_norm < clip_constant,
                                T.constant(1., dtype=FLOATX), clip_constant / (grad_norm + EPSILON))

            # Clip
            clipped_gradients = []
            for param, g in zip(params, gradients):
                g = multiplier * g
                if config.avoid_nan:
                    g = T.switch(isnan, np.float32(0.1) * param, g)
                if config.gradient_tolerance:
                    g = ifelse(grad_norm > config.gradient_tolerance, T.zeros_like(g) + EPSILON, g)
                clipped_gradients.append(g)

            gradients = clipped_gradients
    # Regularization
    if config and config.weight_l2:
        regularized_gradients = []
        for param, grad in zip(params, gradients):
            grad = grad + (2 * config.weight_l2 * param)
            regularized_gradients.append(grad)
        gradients = regularized_gradients

    # Avoid nan but not computing the norm
    # This is not recommended
    if config and config.avoid_nan and not config.gradient_clipping:
        logging.info("avoid NaN gradients")
        new_gradients = []
        for grad in gradients:
            new_grad = ifelse(T.isnan(grad).any(), T.zeros_like(grad) + EPSILON, grad)
            new_gradients.append(new_grad)
        gradients = new_gradients


    # Find method
    method = "SGD"
    if config:
        method = config.get("method", method).upper()
    # Get Function
    func = None
    if method in ["SGD", "ADAGRAD", "ADADELTA", "FINETUNING_ADAGRAD"]:
        from cores.ada_family import ada_family_core
        func = ada_family_core
    elif method == "ADAM":
        from cores.adam import adam_core
        func = adam_core
    elif method == "RMSPROP":
        from cores.rmsprop import rmsprop_core
        func = rmsprop_core
    elif method == "MOMENTUM":
        from cores.momentum import momentum_core
        func = momentum_core

    if not func:
        raise NotImplementedError("method '%s' is not supported" % method)

    logging.info("optimize method=%s parameters=%s" % (method, str(params)))

    free_parameters = []
    return_vals = wrap_core(func, config, params, gradients)
    if type(return_vals) == list and type(return_vals[0]) == list:
        updates, free_parameters = return_vals
    else:
        updates = return_vals

    # No free param recording
    if config and not config.record_free_params:
        free_parameters = []

    # Weight bound
    if config.weight_bound:
        logging.info("apply weight bound of %.2f" % config.weight_bound)
        new_updates = []
        for param, update_value in updates:
            bounded_value = (update_value * (T.abs_(update_value) <= config.weight_bound) +
                             config.weight_bound * (update_value > config.weight_bound) +
                             -config.weight_bound * (update_value < -config.weight_bound))
            new_updates.append((param, bounded_value))
        updates = new_updates
    return updates, free_parameters
Example #7
0
        model.stack(
            HighwayLayerLRDiagDropoutBatchNorm(activation=activation,
                                               gate_bias=gate_bias,
                                               projection_dim=d,
                                               d_p_0=dropout_p_h_0,
                                               d_p_1=dropout_p_h_1,
                                               init=init,
                                               quasi_ortho_init=True))
    #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init))
    model.stack(Dropout(p=dropout_p_2), Dense(10, init=init))

    learning_rate_start = 3e-3
    #learning_rate_target = 3e-7
    #learning_rate_epochs = 100
    #learning_rate_decay  = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs)
    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(
        learning_rate_start)
    #conf.gradient_clipping = 1
    conf.patience = 20
    #conf.gradient_tolerance = 5
    conf.avoid_nan = True
    conf.min_improvement = 1e-10

    #trainer = MomentumTrainer(model)
    trainer = AdamTrainer(model, conf)

    mnist = MiniBatches(MnistDataset(), batch_size=100)
    #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100)

    #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)])