def eval(self, epoch, save_score=False): self.model.eval() self.print_log('Eval epoch: {}'.format(epoch + 1)) loss_value = [] score_frag = [] with torch.no_grad(): for batch_idx, (data, label) in enumerate(self.test_loader): data = Variable( data.float().cuda(self.output_device), requires_grad=False) label = Variable( label.long().cuda(self.output_device), requires_grad=False) output = self.model(data) loss = self.loss(output, label) score_frag.append(output.data.cpu().numpy()) loss_value.append(loss.item()) score = np.concatenate(score_frag) score_dict = dict( zip(self.test_dataset.sample_name, score)) self.print_log('\tMean test loss of {} batches: {}.'.format( len(self.test_loader), np.mean(loss_value))) self.summary_writer.add_scalar('Test/AvgLoss', np.mean(loss_value), epoch) for k in self.args.show_topk: hit_val = self.top_k(score, self.test_dataset.labels, k) self.summary_writer.add_scalar('Test/AvgTop'+str(k), hit_val, epoch) self.print_log('\tTop{}: {:.2f}%'.format( k, 100 * hit_val)) if k == 1: self.best_valid.append((np.mean(loss_value), 100*hit_val)) if save_score: with open('{}/epoch{}_{}_score.pkl'.format( self.args.work_dir, epoch + 1, 'test'), 'w') as f: pickle.dump(score_dict, f)
def trainFNN(): model.train() total_loss = 0. start_time = time.time() print('train_data: ' + str(train_data.size(0))) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.batch_size)): data, targets = get_batch(train_data, i) print('Data: ' + str(data.shape)) print('Target: ' + str(targets.shape)) data = data.float() print(data.type()) optimizer.zero_grad() print('here1') output = model(data) print('here2') print('output: ' + str(output.shape)) loss = criterion(output.view(-1, ntokens), targets) loss.backward() optimizer.step() total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def train(self, epoch, save_model=False): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() self.model.train() self.print_log('Training epoch: {}'.format(epoch + 1)) lr = self.adjust_learning_rate(epoch) loss_value = [] timetracker = Timer(print_log=self.args.print_log, work_dir=self.args.work_dir) timer = dict(dataloader=0.001, model=0.001, statistics=0.001) for batch_idx, (data, label) in enumerate(self.train_loader): # get data data = Variable( data.float().cuda(self.output_device), requires_grad=False) label = Variable( label.long().cuda(self.output_device), requires_grad=False) timer['dataloader'] += timetracker.split_time() # forward output = self.model(data) loss = self.loss(output, label) # backward self.optimizer.zero_grad() loss.backward() self.optimizer.step() loss_value.append(loss.item()) score_frag = output.data.cpu().numpy() label_frag = label.data.cpu().numpy() timer['model'] += timetracker.split_time() hit1 = self.top_k(score_frag, label_frag, 1) hit5 = self.top_k(score_frag, label_frag, 5) loss_val = loss.item() losses.update(loss_val, data[0].size(0)) top1.update(hit1 * 100., data[0].size(0)) top5.update(hit5 * 100., data[0].size(0)) # statistics if batch_idx % self.args.log_interval == 0: self.print_log( '\tBatch({}/{}) done. Top1: {:.2f} ({:.2f}) Top5: {:.2f} ({:.2f}) ' ' Loss: {:.4f} ({:.4f}) lr:{:.6f}'.format( batch_idx, len(self.train_loader), top1.val, top1.avg, top5.val, top5.avg, losses.val, losses.avg, lr)) step = epoch * len(self.train_loader) + batch_idx self.summary_writer.add_scalar('Train/AvgLoss', losses.avg, step) self.summary_writer.add_scalar('Train/AvgTop1', top1.avg, step) self.summary_writer.add_scalar('Train/AvgTop5', top5.avg, step) self.summary_writer.add_scalar('Train/LearningRate', lr, step) timer['statistics'] += timetracker.split_time() # statistics of time consumption and loss proportion = { k: '{:02d}%'.format(int(round(v * 100 / sum(timer.values())))) for k, v in timer.items() } self.print_log( '\tMean training loss: {:.4f}.'.format(np.mean(loss_value))) self.print_log( '\tTime consumption: [Data]{dataloader}, [Network]{model}'.format( **proportion)) if save_model: model_path = '{}/epoch{}_model.pt'.format(self.args.work_dir, epoch + 1) state_dict = self.model.state_dict() weights = OrderedDict([[k.split('module.')[-1], v.cpu()] for k, v in state_dict.items()]) torch.save(weights, model_path)