def __init__(self, network, config=None, method=None): if method: logging.info("changing optimization method to '%s'" % method) if not config: config = TrainerConfig() config.method = method super(GeneralNeuralTrainer, self).__init__(network, config) logging.info('compiling %s learning function', self.__class__.__name__) network_updates = list(network.updates) + list(network.training_updates) learning_updates = list(self.learning_updates()) update_list = network_updates + learning_updates logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates]))) logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates]))) self.learning_func = theano.function( network.input_variables + network.target_variables, self.training_variables, updates=update_list, allow_input_downcast=True, mode=config.get("theano_mode", theano.Mode(linker=THEANO_LINKER)))
class NeuralTrainer(object): """ A base class for all trainers. """ __metaclass__ = ABCMeta def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() # Model and network all refer to the computational graph self.model = self.network = network self.network.prepare_training() self._setup_costs() self.evaluation_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self._iter_callbacks = [] self.best_cost = 1e100 self.best_iter = 0 self.best_params = self.copy_params() self._skip_batches = 0 self._progress = 0 self.last_cost = 0 def _compile_evaluation_func(self): if not self.evaluation_func: logging.info("compile evaluation function") self.evaluation_func = theano.function( self.network.input_variables + self.network.target_variables, self.evaluation_variables, updates=self.network.updates, allow_input_downcast=True, mode=self.config.get("theano_mode", None)) def skip(self, n_batches): """ Skip N batches in the training. """ logging.info("Skip %d batches" % n_batches) self._skip_batches = n_batches def _setup_costs(self): self.cost = self._add_regularization(self.network.cost) self.test_cost = self._add_regularization(self.network.test_cost) self.training_variables = [self.cost] self.training_names = ['J'] for name, monitor in self.network.training_monitors: self.training_names.append(name) self.training_variables.append(monitor) logging.info("monitor list: %s" % ",".join(self.training_names)) self.evaluation_variables = [self.test_cost] self.evaluation_names = ['J'] for name, monitor in self.network.testing_monitors: self.evaluation_names.append(name) self.evaluation_variables.append(monitor) def _add_regularization(self, cost): if self.config.weight_l1 > 0: logging.info("L1 weight regularization: %f" % self.config.weight_l1) cost += self.config.weight_l1 * sum( abs(w).sum() for w in self.network.parameters) if self.config.hidden_l1 > 0: logging.info("L1 hidden unit regularization: %f" % self.config.hidden_l1) cost += self.config.hidden_l1 * sum( abs(h).mean(axis=0).sum() for h in self.network._hidden_outputs) if self.config.hidden_l2 > 0: logging.info("L2 hidden unit regularization: %f" % self.config.hidden_l2) cost += self.config.hidden_l2 * sum( (h * h).mean(axis=0).sum() for h in self.network._hidden_outputs) return cost def set_params(self, targets, free_params=None): for param, target in zip(self.network.parameters, targets): param.set_value(target) if free_params: for param, param_value in zip(self.network.free_parameters, free_params): param.set_value(param_value) def save_params(self, path): self.set_params(*self.best_params) self.network.save_params(path) def load_params(self, path, exclude_free_params=False): """ Load parameters for the training. This method can load free parameters and resume the training progress. """ self.network.load_params(path, exclude_free_params=exclude_free_params) self.best_params = self.copy_params() # Resume the progress if self.network.train_logger.progress() > 0: self.skip(self.network.train_logger.progress()) def copy_params(self): checkpoint = (map(lambda p: p.get_value().copy(), self.network.parameters), map(lambda p: p.get_value().copy(), self.network.free_parameters)) return checkpoint def add_iter_callback(self, func): """ Add a iteration callback function (receives an argument of the trainer). :return: """ self._iter_callbacks.append(func) def train(self, train_set, valid_set=None, test_set=None, train_size=None): """ Train the model and return costs. """ iteration = 0 while True: # Test if not iteration % self.config.test_frequency and test_set: try: self._run_test(iteration, test_set) except KeyboardInterrupt: logging.info('interrupted!') break # Validate if not iteration % self.validation_frequency and valid_set: try: if not self._run_valid(iteration, valid_set): logging.info('patience elapsed, bailing out') break except KeyboardInterrupt: logging.info('interrupted!') break # Train one step try: costs = self._run_train(iteration, train_set, train_size) except KeyboardInterrupt: logging.info('interrupted!') break # Check costs if np.isnan(costs[0][1]): logging.info( "NaN detected in costs, rollback to last parameters") self.set_params(*self.checkpoint) else: iteration += 1 self.network.epoch_callback() yield dict(costs) if valid_set and self.config.get("save_best_parameters", True): self.set_params(*self.best_params) if test_set: self._run_test(-1, test_set) @abstractmethod def learn(self, *variables): """ Update the parameters and return the cost with given data points. :param variables: :return: """ def _run_test(self, iteration, test_set): """ Run on test iteration. """ costs = self.test_step(test_set) info = ' '.join('%s=%.2f' % el for el in costs) message = "test (iter=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) def _run_train(self, iteration, train_set, train_size=None): """ Run one training iteration. """ costs = self.train_step(train_set, train_size) if not iteration % self.config.monitor_frequency: info = " ".join("%s=%.2f" % item for item in costs) message = "monitor (iter=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) return costs def _run_valid(self, iteration, valid_set, dry_run=False): """ Run one valid iteration, return true if to continue training. """ costs = self.valid_step(valid_set) # this is the same as: (J_i - J_f) / J_i > min improvement _, J = costs[0] marker = "" if self.best_cost - J > self.best_cost * self.min_improvement: # save the best cost and parameters self.best_params = self.copy_params() marker = ' *' if not dry_run: self.best_cost = J self.best_iter = iteration if self.config.auto_save: self.network.train_logger.record_progress(self._progress) self.network.save_params(self.config.auto_save, new_thread=True) info = ' '.join('%s=%.2f' % el for el in costs) iter_str = "iter=%d" % (iteration + 1) if dry_run: iter_str = "dryrun" + " " * (len(iter_str) - 6) message = "valid (%s) %s%s" % (iter_str, info, marker) logging.info(message) self.network.train_logger.record(message) self.checkpoint = self.copy_params() return iteration - self.best_iter < self.patience def test_step(self, test_set): self._compile_evaluation_func() costs = list( zip(self.evaluation_names, np.mean([self.evaluation_func(*x) for x in test_set], axis=0))) return costs def valid_step(self, valid_set): self._compile_evaluation_func() costs = list( zip(self.evaluation_names, np.mean([self.evaluation_func(*x) for x in valid_set], axis=0))) return costs def train_step(self, train_set, train_size=None): dirty_trick_times = 0 network_callback = bool(self.network.training_callbacks) trainer_callback = bool(self._iter_callbacks) cost_matrix = [] self._progress = 0 for x in train_set: if self._skip_batches == 0: if dirty_trick_times > 0: cost_x = self.learn(*[t[:(t.shape[0] / 2)] for t in x]) cost_matrix.append(cost_x) cost_x = self.learn(*[t[(t.shape[0] / 2):] for t in x]) dirty_trick_times -= 1 else: try: cost_x = self.learn(*x) except MemoryError: logging.info( "Memory error was detected, perform dirty trick 30 times" ) dirty_trick_times = 30 # Dirty trick cost_x = self.learn(*[t[:(t.shape[0] / 2)] for t in x]) cost_matrix.append(cost_x) cost_x = self.learn(*[t[(t.shape[0] / 2):] for t in x]) cost_matrix.append(cost_x) self.last_cost = cost_x[0] if network_callback: self.network.training_callback() if trainer_callback: for func in self._iter_callbacks: func(self) else: self._skip_batches -= 1 if train_size: self._progress += 1 sys.stdout.write( "\x1b[2K\r> %d%% | J=%.2f" % (self._progress * 100 / train_size, self.last_cost)) sys.stdout.flush() self._progress = 0 if train_size: sys.stdout.write("\r") sys.stdout.flush() costs = list(zip(self.training_names, np.mean(cost_matrix, axis=0))) return costs def run(self, train_set, valid_set=None, test_set=None, train_size=None, controllers=None): """ Run until the end. """ if isinstance(train_set, Dataset): dataset = train_set train_set = dataset.train_set() valid_set = dataset.valid_set() test_set = dataset.test_set() train_size = dataset.train_size() timer = Timer() for _ in self.train(train_set, valid_set=valid_set, test_set=test_set, train_size=train_size): if controllers: ending = False for controller in controllers: if hasattr(controller, 'invoke') and controller.invoke(): ending = True if ending: break timer.report() return
def optimize_updates(params, gradients, config=None, shapes=None): """ General optimization function for Theano. Parameters: params - parameters gradients - gradients config - training config Returns: Theano updates :type config: deepy.TrainerConfig or dict """ if config and isinstance(config, dict): config = TrainerConfig(config) # Clipping if config: clip_value = config.get("gradient_clipping", None) if clip_value: clip_constant = T.constant(clip_value, dtype=FLOATX) if config.avoid_compute_embed_norm: grad_norm = multiple_l2_norm([t[1] for t in zip(params, gradients) if not t[0].name.startswith("W_embed")]) else: grad_norm = multiple_l2_norm(gradients) isnan = T.or_(T.isnan(grad_norm), T.isinf(grad_norm)) multiplier = ifelse(grad_norm < clip_constant, T.constant(1., dtype=FLOATX), clip_constant / (grad_norm + EPSILON)) # Clip clipped_gradients = [] for param, g in zip(params, gradients): g = multiplier * g if config.avoid_nan: g = T.switch(isnan, np.float32(0.1) * param, g) if config.gradient_tolerance: g = ifelse(grad_norm > config.gradient_tolerance, T.zeros_like(g) + EPSILON, g) clipped_gradients.append(g) gradients = clipped_gradients # Regularization if config and config.weight_l2: regularized_gradients = [] for param, grad in zip(params, gradients): grad = grad + (2 * config.weight_l2 * param) regularized_gradients.append(grad) gradients = regularized_gradients # Avoid nan but not computing the norm # This is not recommended if config and config.avoid_nan and not config.gradient_clipping: logging.info("avoid NaN gradients") new_gradients = [] for grad in gradients: new_grad = ifelse(T.isnan(grad).any(), T.zeros_like(grad) + EPSILON, grad) new_gradients.append(new_grad) gradients = new_gradients # Find method method = "SGD" if config: method = config.get("method", method).upper() # Get Function func = None if method in ["SGD", "ADAGRAD", "ADADELTA", "FINETUNING_ADAGRAD"]: from cores.ada_family import ada_family_core func = ada_family_core elif method == "ADAM": from cores.adam import adam_core func = adam_core elif method == "RMSPROP": from cores.rmsprop import rmsprop_core func = rmsprop_core elif method == "MOMENTUM": from cores.momentum import momentum_core func = momentum_core if not func: raise NotImplementedError("method '%s' is not supported" % method) logging.info("optimize method=%s parameters=%s" % (method, str(params))) free_parameters = [] return_vals = wrap_core(func, config, params, gradients) if type(return_vals) == list and type(return_vals[0]) == list: updates, free_parameters = return_vals else: updates = return_vals # No free param recording if config and not config.record_free_params: free_parameters = [] # Weight bound if config.weight_bound: logging.info("apply weight bound of %.2f" % config.weight_bound) new_updates = [] for param, update_value in updates: bounded_value = (update_value * (T.abs_(update_value) <= config.weight_bound) + config.weight_bound * (update_value > config.weight_bound) + -config.weight_bound * (update_value < -config.weight_bound)) new_updates.append((param, bounded_value)) updates = new_updates return updates, free_parameters
class NeuralTrainer(object): '''This is a base class for all trainers.''' def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() self.network = network self.network.prepare_training() self._setup_costs() logging.info("compile evaluation function") self.evaluation_func = theano.function( network.input_variables + network.target_variables, self.evaluation_variables, updates=network.updates, allow_input_downcast=True, mode=self.config.get("theano_mode", None)) self.learning_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self.best_cost = 1e100 self.best_iter = 0 self.best_params = self._copy_network_params() def _setup_costs(self): self.cost = self._add_regularization(self.network.cost) self.test_cost = self._add_regularization(self.network.test_cost) self.training_variables = [self.cost] self.training_names = ['J'] for name, monitor in self.network.training_monitors: self.training_names.append(name) self.training_variables.append(monitor) logging.info("monitor list: %s" % ",".join(self.training_names)) self.evaluation_variables = [self.test_cost] self.evaluation_names = ['J'] for name, monitor in self.network.testing_monitors: self.evaluation_names.append(name) self.evaluation_variables.append(monitor) def _add_regularization(self, cost): if self.config.weight_l1 > 0: logging.info("L1 weight regularization: %f" % self.config.weight_l1) cost += self.config.weight_l1 * sum(abs(w).sum() for w in self.network.parameters) if self.config.hidden_l1 > 0: logging.info("L1 hidden unit regularization: %f" % self.config.hidden_l1) cost += self.config.hidden_l1 * sum(abs(h).mean(axis=0).sum() for h in self.network._hidden_outputs) if self.config.hidden_l2 > 0: logging.info("L2 hidden unit regularization: %f" % self.config.hidden_l2) cost += self.config.hidden_l2 * sum((h * h).mean(axis=0).sum() for h in self.network._hidden_outputs) return cost def set_params(self, targets, free_params=None): for param, target in zip(self.network.parameters, targets): param.set_value(target) if free_params: for param, param_value in zip(self.network.free_parameters, free_params): param.set_value(param_value) def save_params(self, path): self.set_params(*self.best_params) self.network.save_params(path) def load_params(self, path): self.network.load_params(path) self.best_params = self._copy_network_params() def _copy_network_params(self): checkpoint = (map(lambda p: p.get_value().copy(), self.network.parameters), map(lambda p: p.get_value().copy(), self.network.free_parameters)) return checkpoint def train(self, train_set, valid_set=None, test_set=None, train_size=None): """ Train the model and return costs. """ if not self.learning_func: raise NotImplementedError iteration = 0 while True: # Test if not iteration % self.config.test_frequency and test_set: try: self._run_test(iteration, test_set) except KeyboardInterrupt: logging.info('interrupted!') break # Validate if not iteration % self.validation_frequency and valid_set: try: if not self._run_valid(iteration, valid_set): logging.info('patience elapsed, bailing out') break except KeyboardInterrupt: logging.info('interrupted!') break # Train one step try: costs = self._run_train(iteration, train_set, train_size) except KeyboardInterrupt: logging.info('interrupted!') break # Check costs if np.isnan(costs[0][1]): logging.info("NaN detected in costs, rollback to last parameters") self.set_params(*self.checkpoint) else: iteration += 1 self.network.epoch_callback() yield dict(costs) if valid_set and self.config.get("save_best_parameters", True): self.set_params(*self.best_params) if test_set: self._run_test(-1, test_set) def _run_test(self, iteration, test_set): """ Run on test iteration. """ costs = self.test_step(test_set) info = ' '.join('%s=%.2f' % el for el in costs) message = "test (iter=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) def _run_train(self, iteration, train_set, train_size=None): """ Run one training iteration. """ costs = self.train_step(train_set, train_size) if not iteration % self.config.monitor_frequency: info = " ".join("%s=%.2f" % item for item in costs) message = "monitor (iter=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) return costs def _run_valid(self, iteration, valid_set): """ Run one valid iteration, return true if to continue training. """ costs = self.valid_step(valid_set) # this is the same as: (J_i - J_f) / J_i > min improvement _, J = costs[0] if self.best_cost - J > self.best_cost * self.min_improvement: self.best_cost = J self.best_iter = iteration self.best_params = self._copy_network_params() marker = ' *' else: marker = "" info = ' '.join('%s=%.2f' % el for el in costs) message = "valid (iter=%i) %s%s" % (iteration + 1, info, marker) logging.info(message) self.network.train_logger.record(message) self.checkpoint = self._copy_network_params() return iteration - self.best_iter < self.patience def test_step(self, test_set): costs = list(zip( self.evaluation_names, np.mean([self.evaluation_func(*x) for x in test_set], axis=0))) return costs def valid_step(self, valid_set): costs = list(zip( self.evaluation_names, np.mean([self.evaluation_func(*x) for x in valid_set], axis=0))) return costs def train_step(self, train_set, train_size=None): training_callback = bool(self.network.training_callbacks) cost_matrix = [] c = 0 for x in train_set: cost_x = self.learning_func(*x) cost_matrix.append(cost_x) if training_callback: self.network.training_callback() if train_size: c += 1 sys.stdout.write("\r> %d%%" % (c * 100 / train_size)) sys.stdout.flush() if train_size: sys.stdout.write("\r") sys.stdout.flush() costs = list(zip(self.training_names, np.mean(cost_matrix, axis=0))) return costs def run(self, train_set, valid_set=None, test_set=None, train_size=None, controllers=None): """ Run until the end. """ if isinstance(train_set, Dataset): dataset = train_set train_set = dataset.train_set() valid_set = dataset.valid_set() test_set = dataset.test_set() train_size = dataset.train_size() timer = Timer() for _ in self.train(train_set, valid_set=valid_set, test_set=test_set, train_size=train_size): if controllers: ending = False for controller in controllers: if hasattr(controller, 'invoke') and controller.invoke(): ending = True if ending: break timer.report() return
class NeuralTrainer(object): """ A base class for all trainers. """ __metaclass__ = ABCMeta def __init__(self, network, config=None): """ Basic neural network trainer. :type network: deepy.NeuralNetwork :type config: deepy.conf.TrainerConfig :return: """ super(NeuralTrainer, self).__init__() self.config = None if isinstance(config, TrainerConfig): self.config = config elif isinstance(config, dict): self.config = TrainerConfig(config) else: self.config = TrainerConfig() # Model and network all refer to the computational graph self.model = self.network = network self.network.prepare_training() self._setup_costs() self.evaluation_func = None self.validation_frequency = self.config.validation_frequency self.min_improvement = self.config.min_improvement self.patience = self.config.patience self._iter_callbacks = [] self.best_cost = 1e100 self.best_iter = 0 self.best_params = self.copy_params() self._skip_batches = 0 self._progress = 0 self.last_cost = 0 self.last_run_costs = None self._report_time = True def _compile_evaluation_func(self): if not self.evaluation_func: logging.info("compile evaluation function") self.evaluation_func = theano.function( self.network.input_variables + self.network.target_variables, self.evaluation_variables, updates=self.network.updates, allow_input_downcast=True, mode=self.config.get("theano_mode", None)) def skip(self, n_batches): """ Skip N batches in the training. """ logging.info("Skip %d batches" % n_batches) self._skip_batches = n_batches def _setup_costs(self): self.cost = self._add_regularization(self.network.cost) self.test_cost = self._add_regularization(self.network.test_cost) self.training_variables = [self.cost] self.training_names = ['J'] for name, monitor in self.network.training_monitors: self.training_names.append(name) self.training_variables.append(monitor) logging.info("monitor list: %s" % ",".join(self.training_names)) self.evaluation_variables = [self.test_cost] self.evaluation_names = ['J'] for name, monitor in self.network.testing_monitors: self.evaluation_names.append(name) self.evaluation_variables.append(monitor) def _add_regularization(self, cost): if self.config.weight_l1 > 0: logging.info("L1 weight regularization: %f" % self.config.weight_l1) cost += self.config.weight_l1 * sum(abs(w).sum() for w in self.network.parameters) if self.config.hidden_l1 > 0: logging.info("L1 hidden unit regularization: %f" % self.config.hidden_l1) cost += self.config.hidden_l1 * sum(abs(h).mean(axis=0).sum() for h in self.network._hidden_outputs) if self.config.hidden_l2 > 0: logging.info("L2 hidden unit regularization: %f" % self.config.hidden_l2) cost += self.config.hidden_l2 * sum((h * h).mean(axis=0).sum() for h in self.network._hidden_outputs) return cost def set_params(self, targets, free_params=None): for param, target in zip(self.network.parameters, targets): param.set_value(target) if free_params: for param, param_value in zip(self.network.free_parameters, free_params): param.set_value(param_value) def save_params(self, path): self.set_params(*self.best_params) self.network.save_params(path) def load_params(self, path, exclude_free_params=False): """ Load parameters for the training. This method can load free parameters and resume the training progress. """ self.network.load_params(path, exclude_free_params=exclude_free_params) self.best_params = self.copy_params() # Resume the progress if self.network.train_logger.progress() > 0: self.skip(self.network.train_logger.progress()) def copy_params(self): checkpoint = (map(lambda p: p.get_value().copy(), self.network.parameters), map(lambda p: p.get_value().copy(), self.network.free_parameters)) return checkpoint def add_iter_callback(self, func): """ Add a iteration callback function (receives an argument of the trainer). :return: """ self._iter_callbacks.append(func) def train(self, train_set, valid_set=None, test_set=None, train_size=None): """ Train the model and return costs. """ epoch = 0 while True: # Test if not epoch % self.config.test_frequency and test_set: try: self._run_test(epoch, test_set) except KeyboardInterrupt: logging.info('interrupted!') break # Validate if not epoch % self.validation_frequency and valid_set: try: if not self._run_valid(epoch, valid_set): logging.info('patience elapsed, bailing out') break except KeyboardInterrupt: logging.info('interrupted!') break # Train one step try: costs = self._run_train(epoch, train_set, train_size) except KeyboardInterrupt: logging.info('interrupted!') break # Check costs if np.isnan(costs[0][1]): logging.info("NaN detected in costs, rollback to last parameters") self.set_params(*self.checkpoint) else: epoch += 1 self.network.epoch_callback() yield dict(costs) if valid_set and self.config.get("save_best_parameters", True): self.set_params(*self.best_params) if test_set: self._run_test(-1, test_set) @abstractmethod def learn(self, *variables): """ Update the parameters and return the cost with given data points. :param variables: :return: """ def _run_test(self, iteration, test_set): """ Run on test iteration. """ costs = self.test_step(test_set) info = ' '.join('%s=%.2f' % el for el in costs) message = "test (epoch=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) self.last_run_costs = costs def _run_train(self, iteration, train_set, train_size=None): """ Run one training iteration. """ costs = self.train_step(train_set, train_size) if not iteration % self.config.monitor_frequency: info = " ".join("%s=%.2f" % item for item in costs) message = "monitor (epoch=%i) %s" % (iteration + 1, info) logging.info(message) self.network.train_logger.record(message) self.last_run_costs = costs return costs def _run_valid(self, iteration, valid_set, dry_run=False): """ Run one valid iteration, return true if to continue training. """ costs = self.valid_step(valid_set) # this is the same as: (J_i - J_f) / J_i > min improvement _, J = costs[0] marker = "" if self.best_cost - J > self.best_cost * self.min_improvement: # save the best cost and parameters self.best_params = self.copy_params() marker = ' *' if not dry_run: self.best_cost = J self.best_iter = iteration if self.config.auto_save: self.network.train_logger.record_progress(self._progress) self.network.save_params(self.config.auto_save, new_thread=True) info = ' '.join('%s=%.2f' % el for el in costs) epoch = "epoch=%d" % (iteration + 1) if dry_run: epoch = "dryrun" + " " * (len(epoch) - 6) message = "valid (%s) %s%s" % (epoch, info, marker) logging.info(message) self.last_run_costs = costs self.network.train_logger.record(message) self.checkpoint = self.copy_params() return iteration - self.best_iter < self.patience def test_step(self, test_set): self._compile_evaluation_func() costs = list(zip( self.evaluation_names, np.mean([self.evaluation_func(*x) for x in test_set], axis=0))) return costs def valid_step(self, valid_set): self._compile_evaluation_func() costs = list(zip( self.evaluation_names, np.mean([self.evaluation_func(*x) for x in valid_set], axis=0))) return costs def train_step(self, train_set, train_size=None): dirty_trick_times = 0 network_callback = bool(self.network.training_callbacks) trainer_callback = bool(self._iter_callbacks) cost_matrix = [] self._progress = 0 for x in train_set: if self._skip_batches == 0: if dirty_trick_times > 0: cost_x = self.learn(*[t[:(t.shape[0]/2)] for t in x]) cost_matrix.append(cost_x) cost_x = self.learn(*[t[(t.shape[0]/2):] for t in x]) dirty_trick_times -= 1 else: try: cost_x = self.learn(*x) except MemoryError: logging.info("Memory error was detected, perform dirty trick 30 times") dirty_trick_times = 30 # Dirty trick cost_x = self.learn(*[t[:(t.shape[0]/2)] for t in x]) cost_matrix.append(cost_x) cost_x = self.learn(*[t[(t.shape[0]/2):] for t in x]) cost_matrix.append(cost_x) self.last_cost = cost_x[0] if network_callback: self.network.training_callback() if trainer_callback: for func in self._iter_callbacks: func(self) else: self._skip_batches -= 1 if train_size: self._progress += 1 sys.stdout.write("\x1b[2K\r> %d%% | J=%.2f" % (self._progress * 100 / train_size, self.last_cost)) sys.stdout.flush() self._progress = 0 if train_size: sys.stdout.write("\r") sys.stdout.flush() costs = list(zip(self.training_names, np.mean(cost_matrix, axis=0))) return costs def run(self, train_set, valid_set=None, test_set=None, train_size=None, controllers=None): """ Run until the end. """ if isinstance(train_set, Dataset): dataset = train_set train_set = dataset.train_set() valid_set = dataset.valid_set() test_set = dataset.test_set() train_size = dataset.train_size() timer = Timer() for _ in self.train(train_set, valid_set=valid_set, test_set=test_set, train_size=train_size): if controllers: ending = False for controller in controllers: if hasattr(controller, 'invoke') and controller.invoke(): ending = True if ending: break if self._report_time: timer.report()