def evaluate(self): """Main evaluate routine for CustomEvaluator.""" iterator = self._iterators['main'] if self.eval_hook: self.eval_hook(self) if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) summary = reporter_module.DictSummary() self.model.eval() with torch.no_grad(): for batch in it: observation = {} with reporter_module.report_scope(observation): # read scp files # x: original json with loaded features # will be converted to chainer variable later x = self.converter(batch, self.device) if self.ngpu == 0: self.model(*x) else: # apex does not support torch.nn.DataParallel data_parallel(self.model, x, range(self.ngpu)) summary.add(observation) self.model.train() return summary.compute_mean()
def test_data_parallel(self): l = nn.Linear(10, 5).float().cuda() i = torch.randn(20, 10, dtype=torch.float, device="cuda:1") l.cuda(1) expected_out = l(i) loss = expected_out.sum() loss.backward() expected_grads = [] for param in l.parameters(): expected_grads.append(param.grad.clone()) dev_ids_list = [(0, 1), (1, 0)] for dev_id in dev_ids_list: with torch.cuda.device(dev_id[0]): l.cuda() l.zero_grad() out = dp.data_parallel(l, i, dev_id) loss = out.sum() loss.backward() self.assertEqual(out.get_device(), dev_id[0]) self.assertEqual(out, expected_out) for expected, param in zip(expected_grads, l.parameters()): self.assertEqual(param.grad, expected) # Check for None device_ids l = l.cuda() out = dp.data_parallel(l, i)
def test_data_parallel_device_args(self): cuda0 = torch.device('cuda:0') cuda1 = torch.device('cuda:1') # test output_device l = nn.Linear(10, 5).to(cuda0, torch.float) i = torch.randn(20, 10, dtype=torch.float, device=cuda0, requires_grad=True) out = dp.data_parallel(l, i, device_ids=(0, 1), output_device=cuda0) self.assertEqual(out, l(i)) # test device_ids l = nn.Linear(10, 5).to(cuda0, torch.float) i = torch.randn(20, 10, dtype=torch.float, device=cuda0, requires_grad=True) out = dp.data_parallel(l, i, device_ids=(cuda0, cuda1), output_device=cuda0) self.assertEqual(out, l(i))
def forward(self, images, IUV=None, train_mix_cnn=False, detach=True): out_dict = {} if detach: with torch.no_grad(): if self.ngpu > 1 and images.shape[0] % self.ngpu == 0: pred_dp, dp_feature, codes = data_parallel( self.CNet, images, range(self.ngpu)) pred_uv_map, pred_camera = data_parallel( self.LNet, (pred_dp, dp_feature, codes), range(self.ngpu)) pred_vertices = self.sampler.resample(pred_uv_map) else: pred_dp, dp_feature, codes = self.CNet(images) pred_uv_map, pred_camera = self.LNet( pred_dp, dp_feature, codes) pred_vertices = self.sampler.resample(pred_uv_map) else: if self.ngpu > 1 and images.shape[0] % self.ngpu == 0: pred_dp, dp_feature, codes = data_parallel( self.CNet, images, range(self.ngpu)) pred_uv_map, pred_camera = data_parallel( self.LNet, (pred_dp, dp_feature, codes), range(self.ngpu)) pred_vertices = self.sampler.resample(pred_uv_map) else: pred_dp, dp_feature, codes = self.CNet(images) pred_uv_map, pred_camera = self.LNet(pred_dp, dp_feature, codes) pred_vertices = self.sampler.resample(pred_uv_map) out_dict['pred_vertices'] = pred_vertices out_dict['camera'] = pred_camera out_dict['uv_map'] = pred_uv_map out_dict['dp_map'] = pred_dp return out_dict
def test_data_parallel_sparse(self): l = nn.Embedding(10, 5, sparse=True).to("cuda:1") i = torch.randint(10, (20, 5), device="cuda:1", dtype=torch.long) expected_out = l(i) loss = expected_out.sum() loss.backward() expected_grads = [] for param in l.parameters(): expected_grads.append(param.grad.clone()) dev_ids_list = [(0, 1), (1, 0)] for dev_id in dev_ids_list: with torch.cuda.device(dev_id[0]): l.cuda() l.zero_grad() out = dp.data_parallel(l, i, dev_id) loss = out.sum() loss.backward() self.assertEqual(out.get_device(), dev_id[0]) self.assertEqual(out, expected_out) for expected, param in zip(expected_grads, l.parameters()): self.assertEqual(param.grad, expected) # Check for None device_ids l = l.cuda() out = dp.data_parallel(l, i)
def update_core(self): """Main update routine of the CustomUpdater.""" # When we pass one iterator and optimizer to StandardUpdater.__init__, # they are automatically named 'main'. train_iter = self.get_iterator('main') optimizer = self.get_optimizer('main') # Get the next batch ( a list of json files) batch = train_iter.next() # self.iteration += 1 # Increase may result in early report, which is done in other place automatically. x = self.converter(batch, self.device) # Compute the loss at this time step and accumulate it if self.ngpu == 0: loss = self.model(*x).mean() / self.accum_grad else: # apex does not support torch.nn.DataParallel if 'espnet.nets.pytorch_backend.e2e_asr_transformer' in self.model.__class__.__module__: loss = data_parallel(self.model, x + (self.iteration, ), range(self.ngpu)).mean() / self.accum_grad else: loss = data_parallel(self.model, x, range( self.ngpu)).mean() / self.accum_grad if self.use_apex: from apex import amp # NOTE: for a compatibility with noam optimizer opt = optimizer.optimizer if hasattr(optimizer, "optimizer") else optimizer with amp.scale_loss(loss, opt) as scaled_loss: scaled_loss.backward() else: loss.backward() # gradient noise injection if self.grad_noise: from espnet.asr.asr_utils import add_gradient_noise add_gradient_noise(self.model, self.iteration, duration=100, eta=1.0, scale_factor=0.55) loss.detach() # Truncate the graph # update parameters self.forward_count += 1 if self.forward_count != self.accum_grad: return self.forward_count = 0 # compute the gradient norm to check if it is normal or not grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip_threshold) logging.info('grad norm={}'.format(grad_norm)) if math.isnan(grad_norm): logging.warning('grad norm is nan. Do not update model.') else: optimizer.step() optimizer.zero_grad()
def forward(self, input): if input.is_cuda and self.ngpu != 1: y = parallel.data_parallel(self.main, input, range(self.ngpu)) y = y.view(-1, 4 * 4 * 512) y = parallel.data_parallel(self.dense, y, range(self.ngpu)) else: y = self.main(input) y = y.view(-1, 4 * 4 * 512) y = self.dense(y) return y
def test_accuracy(net, data, num_users, ratio, save_file=None, gpus=[0]): """Show test accuracy.""" net.eval() accuracy = binary = 0.0 parallel = len(gpus) > 1 dtype = torch.FloatTensor if parallel else torch.cuda.FloatTensor total_iters = len(data.loader) for idx, input in enumerate(data.loader): # compute output and loss posi_text, nega_text, posi_img, nega_img, uidx = input # convert to Variable posi_text = posi_text * ratio posi_img = posi_img *ratio accuracy_ = 0 binary_ = 0 for i in range(ratio): p_text = tuple(Variable(v.type(dtype)) for v in posi_text[i]) n_text = tuple(Variable(v.type(dtype)) for v in nega_text[i]) p_img = tuple(Variable(v.type(dtype)) for v in posi_img[i]) n_img = tuple(Variable(v.type(dtype)) for v in nega_img[i]) uidx = uidx.view(-1, 1) batch_size = uidx.size(0) uidxv = torch.zeros(batch_size, num_users).scatter_(1, uidx, 1.0) uidxv = Variable(uidxv.type(dtype)) posiv = (p_text, p_img, uidxv) negav = (n_text, n_img, uidxv) # compute gradient and do Optimizer step if parallel: # model parallel pscore, bpscore = data_parallel(net, posiv, gpus) nscore, bnscore = data_parallel(net, negav, gpus) else: pscore, bpscore = net(*posiv) nscore, bnscore = net(*negav) accuracy_ += net.accuracy(pscore - nscore)#, size_average=False) binary_ += net.accuracy(bpscore - bnscore)#, size_average=False) print('Batch [{}]/[{}] Accuracy {:.3f} Accuracy(Binary) {:.3f} \n'. format(idx, total_iters, accuracy_ / ratio, binary_ / (ratio * batch_size))) accuracy += accuracy_ / ratio binary += binary_ / ratio count = len(data.loader.dataset) accuracy /= total_iters binary /= count print('Average accuracy: {}, Binary Accuracy: {}'.format(accuracy, binary)) # save results if net.zero_iscores: results = dict(uaccuracy=accuracy, ubinary=binary) elif net.zero_uscores: results = dict(iaccuracy=accuracy, ibinary=binary) else: results = dict(accuracy=accuracy, binary=binary) if os.path.exists(save_file): results.update(np.load(save_file)) np.savez(save_file, **results)
def forward(self, input): y = input.view(-1, self.nz) if input.is_cuda and self.ngpu != 1: y = parallel.data_parallel(self.linear, y, range(self.ngpu)) y = y.view(-1, self.ngf * 4, 4, 4) y = parallel.data_parallel(self.main, y, range(self.ngpu)) else: y = self.linear(y) y = y.view(-1, self.ngf * 4, 4, 4) y = self.main(y) return y
def evaluate_core(self): self.model.eval() with torch.no_grad(): for samples in self.test_loader: self.reporter.report_dict['fname'] = samples['fname'][0] x = (samples['input'][0].to(self.device), samples['target'][0].to(self.device)) if self.ngpu == 0: self.model(*x) else: data_parallel(self.model, x, range(self.ngpu)) self.model.train()
def forward(self, input): if input.is_cuda and self.ngpu != 1: feature = parallel.data_parallel(self.main, input, range(self.ngpu)) class_output = parallel.data_parallel(self.class_branch, feature, range(self.ngpu)) dis_output = parallel.data_parallel(self.dis_branch, feature, range(self.ngpu)) else: feature = self.main(input) class_output = self.class_branch(feature) dis_output = self.dis_branch(feature) return class_output, dis_output
def test_data_parallel_no_grad(self): test = self class Layer(nn.Module): def forward(self, x): test.assertFalse(torch.is_grad_enabled()) return x l = Layer() i = torch.randn(20, 10, dtype=torch.float, device="cuda") with torch.no_grad(): dp.data_parallel(l, i, (0, 1)) self.assertRaises(AssertionError, lambda: dp.data_parallel(l, i, (0, 1)))
def __call__(self, l, g): l_enc = normalize_tensor(data_parallel(self.estimator, F.adaptive_avg_pool2d(l, 128))) l_enc = torch.unsqueeze(l_enc, dim=2) g_enc = normalize_tensor(data_parallel(self.estimator, F.adaptive_avg_pool2d(g, 128))) if self.mode == 'nce': loss = self.nce_loss(l_enc, g_enc) elif self.mode == 'dv': loss = self.donsker_varadhan_loss(l_enc, g_enc) else: raise NotImplementedError(self.mode) return loss
def evaluate_batch(net, criterion, X, Y, use_cuda): """Evaluate a single batch (without training).""" inp_seq_len = X.size(0) outp_seq_len, batch_size, _ = Y.size() # New sequence net.init_sequence(batch_size, use_cuda) # Feed the sequence + delimiter states = [] for i in range(inp_seq_len): if use_cuda and torch.cuda.is_available(): o, state = data_parallel(net, X[i]) else: o, state = net(X[i]) states += [state] # Read the output (no input given) y_out = [] for i in range(outp_seq_len): if use_cuda and torch.cuda.is_available(): o, state = data_parallel(net, X[i]) else: o, _ = net(X[i]) states += [state] y_out += [o] y_out = torch.cat(y_out, dim=0).unsqueeze(1) loss = criterion(y_out, Y) y_out_binarized = y_out.clone().data for i in y_out_binarized: for j in i: for k in j: k = 0 if k < 0.5 else 1 # The cost is the number of error bits per sequence cost = torch.sum(torch.abs(y_out_binarized - Y.data)) result = { 'loss': loss.data.item(), 'cost': cost / batch_size, 'y_out': y_out, 'y_out_binarized': y_out_binarized, 'states': states } return result
def train(self, train_dataloader): ''' training model ''' self.logger.info('Training model ......') losses = [] start = time.time() current_step = 0 for egs in train_dataloader: current_step += 1 egs = to_device(egs, self.device) self.optimizer.zero_grad() ests = data_parallel(self.net, egs['mix'], device_ids=self.gpuid) loss = si_snr_loss(ests, egs) loss.backward() if self.clip_norm: clip_grad_norm_(self.net.parameters(), self.clip_norm) self.optimizer.step() losses.append(loss.item()) if len(losses) == self.logging_period: avg_loss = sum( losses[-self.logging_period:]) / self.logging_period self.logger.info( '<epoch:{:3d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}, batch:{:d} utterances> ' .format(self.cur_epoch, current_step, self.optimizer.param_groups[0]['lr'], avg_loss, len(losses))) end = time.time() total_loss_avg = sum(losses) / len(losses) self.logger.info( '<epoch:{:3d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '. format(self.cur_epoch, self.optimizer.param_groups[0]['lr'], total_loss_avg, (end - start) / 60)) return total_loss_avg
def encode(self, inputs, hidden=None, device_ids=None): if isinstance(device_ids, tuple): return data_parallel(self.encoder, (inputs, hidden), device_ids=device_ids, dim=0 if self.encoder.batch_first else 1) else: return self.encoder(inputs, hidden)
def train_epoch(dataloader, model, optimizer): model.train() # acc_lst, loss_lst = [], [] stats = collections.defaultdict(list) for batch_idx, data in enumerate(dataloader): fbank, seq_lens, tokens = data fbank, seq_lens, tokens = fbank.cuda(), seq_lens.cuda(), tokens.cuda() optimizer.zero_grad() if args.ngpu <= 1 or args.dist_train: loss = model(fbank, seq_lens, tokens).mean() # / self.accum_grad else: # apex does not support torch.nn.DataParallel loss = ( data_parallel(model, (fbank, seq_lens, tokens), range(args.ngpu)).mean() # / self.accum_grad ) if not hasattr(model, "module"): if hasattr(model, "acc") and model.acc is not None: stats["acc_lst"].append(model.acc) model.acc = None else: if hasattr(model, "acc") and model.module.acc is not None: stats["acc_lst"].append(model.module.acc) model.module.acc = None loss.backward() clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() stats["loss_lst"].append(loss.item()) logging.warning(f"Training batch: {batch_idx+1}/{len(dataloader)}") return dict_average(stats)
def forward_chop(self, *args, shave=10, min_size=160000): scale = 1 if self.input_large else self.scale[self.idx_scale] n_GPUs = min(self.n_GPUs, 4) # height, width h, w = args[0].size()[-2:] top = slice(0, h // 2 + shave) bottom = slice(h - h // 2 - shave, h) left = slice(0, w // 2 + shave) right = slice(w - w // 2 - shave, w) x_chops = [ torch.cat([ a[..., top, left], a[..., top, right], a[..., bottom, left], a[..., bottom, right] ]) for a in args ] y_chops = [] if h * w < 4 * min_size: for i in range(0, 4, n_GPUs): x = [x_chop[i:(i + n_GPUs)] for x_chop in x_chops] y = P.data_parallel(self.model, *x, range(n_GPUs)) if not isinstance(y, list): y = [y] if not y_chops: y_chops = [[c for c in _y.chunk(n_GPUs, dim=0)] for _y in y] else: for y_chop, _y in zip(y_chops, y): y_chop.extend(_y.chunk(n_GPUs, dim=0)) else: for p in zip(*x_chops): y = self.forward_chop(*p, shave=shave, min_size=min_size) if not isinstance(y, list): y = [y] if not y_chops: y_chops = [[_y] for _y in y] else: for y_chop, _y in zip(y_chops, y): y_chop.append(_y) h *= scale w *= scale top = slice(0, h // 2) bottom = slice(h - h // 2, h) bottom_r = slice(h // 2 - h, None) left = slice(0, w // 2) right = slice(w - w // 2, w) right_r = slice(w // 2 - w, None) # batch size, number of color channels b, c = y_chops[0][0].size()[:-2] y = [y_chop[0].new(b, c, h, w) for y_chop in y_chops] for y_chop, _y in zip(y_chops, y): _y[..., top, left] = y_chop[0][..., top, left] _y[..., top, right] = y_chop[1][..., top, right_r] _y[..., bottom, left] = y_chop[2][..., bottom_r, left] _y[..., bottom, right] = y_chop[3][..., bottom_r, right_r] if len(y) == 1: y = y[0] return y
def forward(self, inputs): return data_parallel(self.model, inputs, device_ids=self.gpu_devices, output_device=None, dim=0, module_kwargs=None)
def encode(self, inputs, hidden=None, devices=None): if isinstance(devices, tuple): return data_parallel(self.encoder, (inputs, hidden), device_ids=devices, dim=0 if self.encoder.batch_first else 1) else: return self.encoder(inputs, hidden)
def train(train_loader, model, criterion, optimizer, epoch, args): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (x, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.cuda is not None: x = x.cuda() target = target.cuda() # compute output output = data_parallel(model, x) loss = criterion(output, target) if args.l1_penalty > 0: loss += args.l1_penalty * l1_weight_total(model) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), x.size(0)) top1.update(acc1[0], x.size(0)) top5.update(acc5[0], x.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # record stats in model for visualization model.stats['train_loss'].append(loss.item()) if i % args.print_freq == 0 or i == len(train_loader) - 1: print('Train:: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader) - 1, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return losses.avg
def test_data_parallel_nested_output(self): def fn(input): return [ input, (input.sin(), input.cos(), [input.add(1)]), input, OrderedDict(a=input, b=[input.sin()]) ] class Net(nn.Module): def forward(self, input): return fn(input) i = torch.randn(2, 2).float().cuda(1) gpus = range(torch.cuda.device_count()) output = dp.data_parallel(Net(), i, gpus) self.assertEqual(output, fn(i)) self.assertIsInstance(output[0], torch.Tensor) self.assertIsInstance(output[1], tuple) self.assertIsInstance(output[1][0], torch.Tensor) self.assertIsInstance(output[1][1], torch.Tensor) self.assertIsInstance(output[1][2], list) self.assertIsInstance(output[1][2][0], torch.Tensor) self.assertIsInstance(output[2], torch.Tensor) self.assertIsInstance(output[3], dict) self.assertEqual(len(output[3]), 2) self.assertIn('a', output[3]) self.assertIn('b', output[3]) self.assertIsInstance(output[3]['a'], torch.Tensor) self.assertIsInstance(output[3]['b'], list) self.assertIsInstance(output[3]['b'][0], torch.Tensor)
def decode(self, inputs, context, devices=None): if isinstance(devices, tuple): return data_parallel(self.decoder, (inputs, context), device_ids=devices, dim=0 if self.batch_first else 1) else: return self.decoder(inputs, context)
def validation(model, args, lr, epoch, device): dataloader, dataset = make_loader( args.cv_list, args.batch_size, num_workers=args.num_threads, ) model.eval() loss_total = 0. sisnr_total = 0. num_batch = len(dataloader) stime = time.time() with torch.no_grad(): for idx, data in enumerate(dataloader): inputs, labels = data inputs = inputs.to(device) labels = labels.to(device) outputs, wav = data_parallel(model, (inputs, )) loss = model.loss(outputs, labels,mode='Mix')[0] sisnr = model.loss(wav, labels, mode='SiSNR') loss_total += loss.data.cpu() sisnr_total += sisnr.data.cpu() del loss, data, inputs, labels, wav, outputs etime = time.time() eplashed = (etime - stime) / num_batch loss_total_avg = loss_total / num_batch sisnr_total_avg = sisnr_total / num_batch print('CROSSVAL AVG.LOSS | Epoch {:3d}/{:3d} ' '| lr {:.4e} | {:2.3f}s/batch| time {:2.1f}mins ' '| Mixloss {:2.4f} | SiSNR {:2.4f}'.format(epoch + 1, args.max_epoch, lr, eplashed, (etime - stime)/60.0, loss_total_avg.item(), -sisnr_total_avg.item())) sys.stdout.flush() return loss_total_avg, sisnr_total_avg
def val(self, val_dataloader): ''' validation model ''' self.logger.info('Validation model ......') self.net.eval() losses = [] current_step = 0 start = time.time() with torch.no_grad(): for egs in val_dataloader: current_step += 1 egs = to_device(egs, self.device) ests = data_parallel(self.net, egs['mix'], device_ids=self.gpuid) loss = si_snr_loss(ests, egs) losses.append(loss.item()) if len(losses) == self.logging_period: avg_loss = sum( losses[-self.logging_period:]) / self.logging_period self.logger.info( '<epoch:{:3d}, iter:{:d}, lr:{:.3e}, loss:{:.3f}, batch:{:d} utterances> ' .format(self.cur_epoch, current_step, self.optimizer.param_groups[0]['lr'], avg_loss, len(losses))) end = time.time() total_loss_avg = sum(losses) / len(losses) self.logger.info( '<epoch:{:3d}, lr:{:.3e}, loss:{:.3f}, Total time:{:.3f} min> '. format(self.cur_epoch, self.optimizer.param_groups[0]['lr'], total_loss_avg, (end - start) / 60)) return total_loss_avg
def calc_gradient_penalty(self, netD, real_data, fake_data): # 梯度惩罚 batch_size, c, w, h = real_data.size() alpha = torch.randn((batch_size, 1)) alpha = alpha.expand(batch_size, int(real_data.nelement() / batch_size)).contiguous() alpha = alpha.view(real_data.size()) alpha = self.tensor2variable(alpha) interpolates = alpha * real_data.detach() + ( (1 - alpha) * fake_data.detach()) # print(interpolates.requires_grad) # interpolates = torch.FloatTensor(interpolates, requires_grad=True) # interpolates.requires_grad_(True) interpolates.requires_grad = True # disc_interpolates = netD(interpolates) # 多GPU训练网络 disc_interpolates = data_parallel(netD, interpolates, self.gpus) # 多GPU训练网络 gradients = grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).to( self.default_device_id), create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.view(gradients.size(0), -1) gradient_penalty = ( (gradients.norm(2, dim=1) - 1)**2).mean() * self.LAMBDA return gradient_penalty
def evaluate(self): """Evaluate the model.""" val_iter = self.get_iterator('main') loss = 0 nll = 0 count = 0 self.model.eval() with torch.no_grad(): for batch in copy.copy(val_iter): x, t = concat_examples(batch, device=self.device[0], padding=(0, -100)) if self.device[0] == -1: l, n, c = self.model(x, t) else: # apex does not support torch.nn.DataParallel l, n, c = data_parallel(self.model, (x, t), self.device) loss += float(l.sum()) nll += float(n.sum()) count += int(c.sum()) self.model.train() # report validation loss observation = {} with reporter.report_scope(observation): reporter.report({'loss': loss}, self.model.reporter) reporter.report({'nll': nll}, self.model.reporter) reporter.report({'count': count}, self.model.reporter) return observation
def evaluate(model, device, sets): acc15 = acc10 = acc5 = mae = 0 model.eval() mix_scp = sets dataset = DataReader(mix_scp) total_num = len(dataset) print('=> Decoding ...') sys.stdout.flush() start_time = datetime.datetime.now() with th.no_grad(): for idx, data in enumerate(dataset.read()): start = datetime.datetime.now() key = data['key'] mix = data['mix'].to(device).float() doa = data['doa'].to(device).float() ssl, sns = data_parallel(model, (mix)) speech_ssl = th.argmax(ssl*sns).item() if min(abs(int(speech_ssl)-doa),(360-abs(int(speech_ssl)-doa))) <= 10: acc15 = acc15+1 if min(abs(int(speech_ssl)-doa),(360-abs(int(speech_ssl)-doa))) <= 7.5: acc10 = acc10+1 if min(abs(int(speech_ssl)-doa),(360-abs(int(speech_ssl)-doa))) <= 5: acc5 = acc5+1 mae = mae + min(abs(int(speech_ssl)-doa),360-abs(int(speech_ssl)-doa)) elapsed = (datetime.datetime.now() - start).total_seconds() elapsed = (datetime.datetime.now() - start_time).total_seconds() print('=> Decode done. Total time is {:.2f} mins'.format(elapsed / 60.0)) print('=> acc is {:.4f} {:.4f} {:.4f}'.format(acc15 / total_num, acc10 / total_num, acc5 / total_num)) print(mae/total_num)
def forward(self, x, mask=None, idx_scale=0): self.idx_scale = idx_scale if hasattr(self.model, 'set_scale'): self.model.set_scale(idx_scale) if self.training: if self.n_GPUs > 1: #return P.data_parallel(self.model, (x,mask), range(self.n_GPUs)) return P.data_parallel(self.model, (x, mask), range(self.n_GPUs)) else: return self.model(x, mask) else: if self.chop: forward_function = self.forward_chop else: forward_function = self.model.forward if self.self_ensemble: return self.forward_x8(x, forward_function=forward_function) else: if self.args.debug: if mask.shape[1] > 120: print('forward x shape', x.shape) print('forward mask shape', mask.shape) return forward_function(x, mask)
def fitb(config): parallel, device = utils.get_device(config.gpus) data_param = config.fitb_data_param LOGGER.info("Get data for FITB questions: %s", data_param) loader = polyvore.data.get_dataloader(data_param) pbar = tqdm.tqdm(loader, desc="Computing scores") net = get_net(config) net.eval() correct = 0 cnt = 0 for inputv in pbar: inputv = utils.to_device(inputv, device) with torch.no_grad(): if parallel: _, score_b = data_parallel(net, inputv, config.gpus) else: _, score_b = net(*inputv) # the first item is the groud-truth item if torch.argmax(score_b).item() == 0: correct += 1 cnt += 1 pbar.set_description("Accuracy: {:.3f}".format(correct / cnt)) fitb_acc = correct / cnt LOGGER.info("FITB Accuracy %.4f", fitb_acc) results = dict(fitb_acc=fitb_acc) update_npz(config.result_file, results)
def update_core(self): """Main update routine of the CustomUpdater.""" # When we pass one iterator and optimizer to StandardUpdater.__init__, # they are automatically named 'main'. train_iter = self.get_iterator("main") optimizer = self.get_optimizer("main") epoch = train_iter.epoch # Get the next batch (a list of json files) batch = train_iter.next() # self.iteration += 1 # Increase may result in early report, # which is done in other place automatically. x = _recursive_to(batch, self.device) is_new_epoch = train_iter.epoch != epoch # When the last minibatch in the current epoch is given, # gradient accumulation is turned off in order to evaluate the model # on the validation set in every epoch. # see details in https://github.com/espnet/espnet/pull/1388 # Compute the loss at this time step and accumulate it if self.ngpu == 0: loss = self.model(*x).mean() / self.accum_grad else: # apex does not support torch.nn.DataParallel loss = (data_parallel(self.model, x, range(self.ngpu)).mean() / self.accum_grad) if self.use_apex: from apex import amp # NOTE: for a compatibility with noam optimizer opt = optimizer.optimizer if hasattr(optimizer, "optimizer") else optimizer with amp.scale_loss(loss, opt) as scaled_loss: scaled_loss.backward() else: loss.backward() # gradient noise injection if self.grad_noise: from espnet.asr.asr_utils import add_gradient_noise add_gradient_noise(self.model, self.iteration, duration=100, eta=1.0, scale_factor=0.55) # update parameters self.forward_count += 1 if not is_new_epoch and self.forward_count != self.accum_grad: return self.forward_count = 0 # compute the gradient norm to check if it is normal or not grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip_threshold) logging.info("grad norm={}".format(grad_norm)) if math.isnan(grad_norm): logging.warning("grad norm is nan. Do not update model.") else: optimizer.step() optimizer.zero_grad()
def decode(self, inputs, state, get_attention=None, devices=None): if isinstance(devices, tuple): inputs = (inputs, state, get_attention) if get_attention else ( inputs, state) return data_parallel(self.decoder, inputs, device_ids=devices, dim=0 if self.decoder.batch_first else 1) else: if get_attention: return self.decoder(inputs, state, get_attention=get_attention) else: return self.decoder(inputs, state)