def update_core(self): # When we pass one iterator and optimizer to StandardUpdater.__init__, # they are automatically named 'main'. train_iter = self.get_iterator('main') optimizer = self.get_optimizer('main') # Get the next batch ( a list of json files) batch = train_iter.__next__() # read scp files # x: original json with loaded features # will be converted to chainer variable later x = self.converter(batch) # Compute the loss at this time step and accumulate it loss = optimizer.target(x) optimizer.target.cleargrads() # Clear the parameter gradients loss.backward() # Backprop loss.unchain_backward() # Truncate the graph # compute the gradient norm to check if it is normal or not grad_norm = np.sqrt( sum_sqnorm([p.grad for p in optimizer.target.params(False)])) logging.info('grad norm={}'.format(grad_norm)) if math.isnan(grad_norm): logging.warning('grad norm is nan. Do not update model.') else: optimizer.update() delete_feat(x)
def evaluate(self): iterator = self._iterators['main'] if self.eval_hook: self.eval_hook(self) if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) summary = reporter_module.DictSummary() self.model.eval() if not torch_is_old: torch.set_grad_enabled(False) for batch in it: observation = {} with reporter_module.report_scope(observation): # read scp files # x: original json with loaded features # will be converted to chainer variable later x = self.converter(batch) self.model(x) delete_feat(x) summary.add(observation) self.model.train() if not torch_is_old: torch.set_grad_enabled(True) return summary.compute_mean()
def evaluate(self): '''evaluate over iterator''' iterator = self._iterators['main'] eval_func = self.eval_func or self._targets['main'] if self.eval_hook: self.eval_hook(self) if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) summary = reporter_module.DictSummary() # for multi gpu calculation chainer.cuda.get_device_from_id(self.device).use() for batch in it: observation = {} with reporter_module.report_scope(observation): # read scp files # x: original json with loaded features # will be converted to chainer variable later # batch only has one minibatch utterance, which is specified by batch[0] x = converter_kaldi(batch[0], self.reader) with function.no_backprop_mode(): eval_func(x) delete_feat(x) summary.add(observation) return summary.compute_mean()
def update_core(self): # When we pass one iterator and optimizer to StandardUpdater.__init__, # they are automatically named 'main'. train_iter = self.get_iterator('main') optimizer = self.get_optimizer('main') # Get the next batch ( a list of json files) batch = train_iter.__next__() # read scp files # x: original json with loaded features # will be converted to chainer variable later # batch only has one minibatch utterance, which is specified by batch[0] if len(batch[0]) < self.num_gpu: logging.warning('batch size is less than number of gpus. Ignored') return x = converter_kaldi(batch[0], self.reader) # Compute the loss at this time step and accumulate it loss = 1. / self.num_gpu * self.model(x) optimizer.zero_grad() # Clear the parameter gradients if self.num_gpu > 1: loss.backward(torch.ones(self.num_gpu)) # Backprop else: loss.backward() # Backprop loss.detach() # Truncate the graph # compute the gradient norm to check if it is normal or not grad_norm = torch.nn.utils.clip_grad_norm( self.model.parameters(), self.grad_clip_threshold) logging.info('grad norm={}'.format(grad_norm)) if math.isnan(grad_norm): logging.warning('grad norm is nan. Do not update model.') else: optimizer.step() delete_feat(x)
def evaluate(self): iterator = self._iterators['main'] if self.eval_hook: self.eval_hook(self) if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) summary = reporter_module.DictSummary() for batch in it: observation = {} with reporter_module.report_scope(observation): # read scp files # x: original json with loaded features # will be converted to chainer variable later # batch only has one minibatch utterance, which is specified by batch[0] x = converter_kaldi(batch[0], self.reader) self.model.eval() self.model(x) delete_feat(x) summary.add(observation) return summary.compute_mean()
def update_core(self): self.setup_workers() self._send_message(('update', None)) with cuda.Device(self._devices[0]): from cupy.cuda import nccl # For reducing memory self._master.cleargrads() optimizer = self.get_optimizer('main') batch = self.get_iterator('main').next() x = converter_kaldi(batch[0], self.reader) loss = self._master(x) self._master.cleargrads() loss.backward() loss.unchain_backward() # NCCL: reduce grads null_stream = cuda.Stream.null if self.comm is not None: gg = gather_grads(self._master) self.comm.reduce(gg.data.ptr, gg.data.ptr, gg.size, nccl.NCCL_FLOAT, nccl.NCCL_SUM, 0, null_stream.ptr) scatter_grads(self._master, gg) del gg # check gradient value grad_norm = np.sqrt(self._sum_sqnorm( [p.grad for p in optimizer.target.params(False)])) logging.info('grad norm={}'.format(grad_norm)) # update if math.isnan(grad_norm): logging.warning('grad norm is nan. Do not update model.') else: optimizer.update() if self.comm is not None: gp = gather_params(self._master) self.comm.bcast(gp.data.ptr, gp.size, nccl.NCCL_FLOAT, 0, null_stream.ptr) delete_feat(x)
def run(self): from cupy.cuda import nccl dev = cuda.Device(self.device) dev.use() self.setup() gp = None while True: job, data = self.pipe.recv() if job == 'finalize': dev.synchronize() break if job == 'update': # For reducing memory self.model.cleargrads() batch = self.iterator.next() x = converter_kaldi(batch[0], self.reader) observation = {} with self.reporter.scope(observation): loss = self.model(x) self.model.cleargrads() loss.backward() loss.unchain_backward() del loss gg = gather_grads(self.model) null_stream = cuda.Stream.null self.comm.reduce(gg.data.ptr, gg.data.ptr, gg.size, nccl.NCCL_FLOAT, nccl.NCCL_SUM, 0, null_stream.ptr) del gg self.model.cleargrads() gp = gather_params(self.model) self.comm.bcast(gp.data.ptr, gp.size, nccl.NCCL_FLOAT, 0, null_stream.ptr) scatter_params(self.model, gp) gp = None delete_feat(x)