def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() # Model and network all refer to the computational graph self.model = self.network = network self.network.prepare_training() self._setup_costs() self.evaluation_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self._iter_callbacks = [] self.best_cost = 1e100 self.best_iter = 0 self.best_params = self.copy_params() self._skip_batches = 0 self._progress = 0 self.last_cost = 0
def __init__(self, network, method=None, config=None, annealer=None, validator=None): if method: logging.info("changing optimization method to '%s'" % method) if not config: config = TrainerConfig() elif isinstance(config, dict): config = TrainerConfig(config) config.method = method super(GeneralNeuralTrainer, self).__init__(network, config, annealer=annealer, validator=validator) self._learning_func = None
def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() self.network = network self.network.prepare_training() self._setup_costs() logging.info("compile evaluation function") self.evaluation_func = theano.function( network.input_variables + network.target_variables, self.evaluation_variables, updates=network.updates, allow_input_downcast=True, mode=self.config.get("theano_mode", None)) self.learning_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self.best_cost = 1e100 self.best_iter = 0 self.best_params = self._copy_network_params() self._skip_batches = 0 self._progress = 0
def __init__(self, network, config=None, method=None): if method: logging.info("changing optimization method to '%s'" % method) if not config: config = TrainerConfig() elif isinstance(config, dict): config = TrainerConfig(config) config.method = method super(GeneralNeuralTrainer, self).__init__(network, config) logging.info('compiling %s learning function', self.__class__.__name__) network_updates = list(network.updates) + list( network.training_updates) learning_updates = list(self.learning_updates()) update_list = network_updates + learning_updates logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates]))) logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates]))) if False and config.data_transmitter: variables = [config.data_transmitter.get_iterator()] givens = config.data_transmitter.get_givens() else: variables = network.input_variables + network.target_variables givens = None self.learning_func = theano.function( variables, map(lambda v: theano.Out(v, borrow=True), self.training_variables), updates=update_list, allow_input_downcast=True, mode=self.config.get("theano_mode", None), givens=givens)
def __init__(self, state_num, action_num, experience_replay=True): self.state_num = state_num self.action_num = action_num self.experience_replay = experience_replay self.experience_pool = [] self.model = get_model(state_num, action_num) train_conf = TrainerConfig() train_conf.learning_rate = LEARNING_RATE train_conf.weight_l2 = 0 self.trainer = SGDTrainer(self.model, train_conf) self.trainer.training_names = [] self.trainer.training_variables = [] self.thread_lock = threading.Lock() self.epsilon = EPSILON self.tick = 0
def optimize_updates(params, gradients, config=None, shapes=None): """ General optimization function for Theano. Parameters: params - parameters gradients - gradients config - training config Returns: Theano updates :type config: deepy.TrainerConfig or dict """ if config and isinstance(config, dict): config = TrainerConfig(config) # Clipping if config: clip_value = config.get("gradient_clipping", None) if clip_value: clip_constant = T.constant(clip_value, dtype=FLOATX) if config.avoid_compute_embed_norm: grad_norm = multiple_l2_norm([t[1] for t in zip(params, gradients) if not t[0].name.startswith("W_embed")]) else: grad_norm = multiple_l2_norm(gradients) isnan = T.or_(T.isnan(grad_norm), T.isinf(grad_norm)) multiplier = ifelse(grad_norm < clip_constant, T.constant(1., dtype=FLOATX), clip_constant / (grad_norm + EPSILON)) # Clip clipped_gradients = [] for param, g in zip(params, gradients): g = multiplier * g if config.avoid_nan: g = T.switch(isnan, np.float32(0.1) * param, g) if config.gradient_tolerance: g = ifelse(grad_norm > config.gradient_tolerance, T.zeros_like(g) + EPSILON, g) clipped_gradients.append(g) gradients = clipped_gradients # Regularization if config and config.weight_l2: regularized_gradients = [] for param, grad in zip(params, gradients): grad = grad + (2 * config.weight_l2 * param) regularized_gradients.append(grad) gradients = regularized_gradients # Avoid nan but not computing the norm # This is not recommended if config and config.avoid_nan and not config.gradient_clipping: logging.info("avoid NaN gradients") new_gradients = [] for grad in gradients: new_grad = ifelse(T.isnan(grad).any(), T.zeros_like(grad) + EPSILON, grad) new_gradients.append(new_grad) gradients = new_gradients # Find method method = "SGD" if config: method = config.get("method", method).upper() # Get Function func = None if method in ["SGD", "ADAGRAD", "ADADELTA", "FINETUNING_ADAGRAD"]: from cores.ada_family import ada_family_core func = ada_family_core elif method == "ADAM": from cores.adam import adam_core func = adam_core elif method == "RMSPROP": from cores.rmsprop import rmsprop_core func = rmsprop_core elif method == "MOMENTUM": from cores.momentum import momentum_core func = momentum_core if not func: raise NotImplementedError("method '%s' is not supported" % method) logging.info("optimize method=%s parameters=%s" % (method, str(params))) free_parameters = [] return_vals = wrap_core(func, config, params, gradients) if type(return_vals) == list and type(return_vals[0]) == list: updates, free_parameters = return_vals else: updates = return_vals # No free param recording if config and not config.record_free_params: free_parameters = [] # Weight bound if config.weight_bound: logging.info("apply weight bound of %.2f" % config.weight_bound) new_updates = [] for param, update_value in updates: bounded_value = (update_value * (T.abs_(update_value) <= config.weight_bound) + config.weight_bound * (update_value > config.weight_bound) + -config.weight_bound * (update_value < -config.weight_bound)) new_updates.append((param, bounded_value)) updates = new_updates return updates, free_parameters
model.stack( HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0=dropout_p_h_0, d_p_1=dropout_p_h_1, init=init, quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init)) learning_rate_start = 3e-3 #learning_rate_target = 3e-7 #learning_rate_epochs = 100 #learning_rate_decay = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate( learning_rate_start) #conf.gradient_clipping = 1 conf.patience = 20 #conf.gradient_tolerance = 5 conf.avoid_nan = True conf.min_improvement = 1e-10 #trainer = MomentumTrainer(model) trainer = AdamTrainer(model, conf) mnist = MiniBatches(MnistDataset(), batch_size=100) #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100) #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)])