def init_optimizer(self, kvstore='local', optimizer='sgd', optimizer_params=(('learning_rate', 0.01),), force_init=False): """Install and initialize optimizers. Parameters ---------- kvstore : str or KVStore Default `'local'`. optimizer : str or Optimizer Default `'sgd'` optimizer_params : dict Default `(('learning_rate', 0.01),)`. The default value is not a dictionary, just to avoid pylint warning of dangerous default values. force_init : bool Default `False`, indicating whether we should force re-initializing the optimizer in the case an optimizer is already installed. """ assert self.binded and self.params_initialized if self.optimizer_initialized and not force_init: self.logger.warning('optimizer already initialized, ignoring...') return (kvstore, update_on_kvstore) = \ _create_kvstore(kvstore, len(self._context), self._arg_params) batch_size = self._exec_group.batch_size if kvstore and 'dist' in kvstore.type and '_sync' in kvstore.type: batch_size *= kvstore.num_workers rescale_grad = 1.0/batch_size if isinstance(optimizer, str): idx2name = {} if update_on_kvstore: idx2name.update(enumerate(self._exec_group.param_names)) else: for k in range(len(self._context)): idx2name.update({i*len(self._context)+k: n for i, n in enumerate(self._exec_group.param_names)}) optimizer_params = dict(optimizer_params) if 'rescale_grad' not in optimizer_params: optimizer_params['rescale_grad'] = rescale_grad optimizer = opt.create(optimizer, sym=self.symbol, param_idx2name=idx2name, **optimizer_params) else: assert isinstance(optimizer, opt.Optimizer) if optimizer.rescale_grad != rescale_grad: #pylint: disable=no-member warnings.warn( "Optimizer created manually outside Module but rescale_grad " + "is not normalized to 1.0/batch_size/num_workers (%s vs. %s). "%( optimizer.rescale_grad, rescale_grad) + "Is this intended?", stacklevel=2) self._optimizer = optimizer self._kvstore = kvstore self._update_on_kvstore = update_on_kvstore self._updater = None if kvstore: # copy initialized local parameters to kvstore _initialize_kvstore(kvstore=kvstore, param_arrays=self._exec_group.param_arrays, arg_params=self._arg_params, param_names=self._param_names, update_on_kvstore=update_on_kvstore) if update_on_kvstore: kvstore.set_optimizer(self._optimizer) else: self._updater = opt.get_updater(optimizer) self.optimizer_initialized = True if self._preload_opt_states is not None: self.load_optimizer_states(self._preload_opt_states) self._preload_opt_states = None
def fit(self, X, marks, e_marks=None, y=None, eval_data=None, eval_metric='acc', epoch_end_callback=None, batch_end_callback=None, time_step_callback=None, kvstore='local', logger=None, work_load_list=None, monitor=None, eval_batch_end_callback=None): """Overwrite""" data = self._init_iter(X, y, is_train=True) eval_data = self._init_eval_iter(eval_data) if self.sym_gen: self.symbol = self.sym_gen(data.default_bucket_key) # pylint: disable=no-member self._check_arguments() self.kwargs["sym"] = self.symbol param_dict = dict(data.provide_data + data.provide_label) arg_names, param_names, aux_names = self._init_params(param_dict) # setup metric if not isinstance(eval_metric, metric.EvalMetric): eval_metric = metric.create(eval_metric) # create kvstore (kvstore, update_on_kvstore) = _create_kvstore(kvstore, len(self.ctx), self.arg_params) param_idx2name = {} if update_on_kvstore: param_idx2name.update(enumerate(param_names)) else: for i, n in enumerate(param_names): for k in range(len(self.ctx)): param_idx2name[i * len(self.ctx) + k] = n self.kwargs["param_idx2name"] = param_idx2name # init optmizer if isinstance(self.optimizer, str): batch_size = data.batch_size if kvstore and kvstore.type == 'dist_sync': batch_size *= kvstore.num_workers optimizer = opt.create(self.optimizer, rescale_grad=(1.0 / batch_size), **(self.kwargs)) elif isinstance(self.optimizer, opt.Optimizer): optimizer = self.optimizer # do training _train_rnn(self.symbol, self.ctx, marks, arg_names, param_names, aux_names, self.arg_params, self.aux_params, begin_epoch=self.begin_epoch, end_epoch=self.num_epoch, epoch_size=self.epoch_size, optimizer=optimizer, train_data=data, eval_data=eval_data, eval_metric=eval_metric, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, time_step_callback=time_step_callback, kvstore=kvstore, update_on_kvstore=update_on_kvstore, logger=logger, work_load_list=work_load_list, monitor=monitor, eval_batch_end_callback=eval_batch_end_callback, sym_gen=self.sym_gen, e_marks=e_marks)
def fit(self, X, marks, e_marks=None, y=None, eval_data=None, eval_metric='acc', epoch_end_callback=None, batch_end_callback=None, time_step_callback=None, kvstore='local', logger=None, work_load_list=None, monitor=None, eval_batch_end_callback=None): """Overwrite""" data = self._init_iter(X, y, is_train=True) eval_data = self._init_eval_iter(eval_data) if self.sym_gen: self.symbol = self.sym_gen( data.default_bucket_key) # pylint: disable=no-member self._check_arguments() self.kwargs["sym"] = self.symbol param_dict = dict(data.provide_data + data.provide_label) arg_names, param_names, aux_names = self._init_params(param_dict) # setup metric if not isinstance(eval_metric, metric.EvalMetric): eval_metric = metric.create(eval_metric) # create kvstore (kvstore, update_on_kvstore) = _create_kvstore( kvstore, len(self.ctx), self.arg_params) param_idx2name = {} if update_on_kvstore: param_idx2name.update(enumerate(param_names)) else: for i, n in enumerate(param_names): for k in range(len(self.ctx)): param_idx2name[i * len(self.ctx) + k] = n self.kwargs["param_idx2name"] = param_idx2name # init optmizer if isinstance(self.optimizer, str): batch_size = data.batch_size if kvstore and kvstore.type == 'dist_sync': batch_size *= kvstore.num_workers optimizer = opt.create(self.optimizer, rescale_grad=(1.0 / batch_size), **(self.kwargs)) elif isinstance(self.optimizer, opt.Optimizer): optimizer = self.optimizer # do training _train_rnn(self.symbol, self.ctx, marks, arg_names, param_names, aux_names, self.arg_params, self.aux_params, begin_epoch=self.begin_epoch, end_epoch=self.num_epoch, epoch_size=self.epoch_size, optimizer=optimizer, train_data=data, eval_data=eval_data, eval_metric=eval_metric, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, time_step_callback=time_step_callback, kvstore=kvstore, update_on_kvstore=update_on_kvstore, logger=logger, work_load_list=work_load_list, monitor=monitor, eval_batch_end_callback=eval_batch_end_callback, sym_gen=self.sym_gen, e_marks=e_marks)
def run(self): data = self.model._init_iter(self.data, None, is_train=True) arg_names, param_names, aux_names = \ self.model._init_params(dict(data.provide_data+data.provide_label)) # create kvstore (kvstore, update_on_kvstore) = _create_kvstore( self.kv, len(self.ctxs), self.model.arg_params) self.executor_manager = DataParallelExecutorManager(symbol=self.sym, ctx=self.ctxs, train_data=self.data, param_names=param_names, arg_names=arg_names, aux_names=aux_names, logger=logger) self.executor_manager.set_params(self.model.arg_params, self.model.aux_params) if not update_on_kvstore: updater = get_updater(optimizer) if kvstore: _initialize_kvstore(kvstore=kvstore, param_arrays=self.executor_manager.param_arrays, arg_params=self.model.arg_params, param_names=self.executor_manager.param_names, update_on_kvstore=update_on_kvstore) if update_on_kvstore: kvstore.set_optimizer(self.optimizer) for e in self.before_training_extensions: e(self) while True: self.metric.reset() nbatch = 0 self.data.reset() for data_batch in self.data: self.executor_manager.load_data_batch(data_batch) self.executor_manager.forward(is_train=True) self.executor_manager.backward() if update_on_kvstore: _update_params_on_kvstore(self.executor_manager.param_arrays, self.executor_manager.grad_arrays, kvstore) else: _update_params(self.executor_manager.param_arrays, self.executor_manager.grad_arrays, updater=updater, num_device=len(self.model.ctx), kvstore=kvstore) # evaluate at end, so out_cpu_array can lazy copy self.metric.update(data_batch.label, self.executor_manager.cpu_output_arrays) self.status['iterations'] += 1 self.status['epoch_iterations'] += 1 self.log[self.status['iterations']] = dict(iterations=self.status['iterations']) self.current_log = self.log[self.status['iterations']] for e in self.batch_extensions: e(self) nbatch += 1 self.status['epochs'] += 1 self.status['epoch_iterations'] = 0 for e in self.epoch_extensions: e(self)