def test(args, model, device, test_loader, epoch): model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.nll_loss( output, target, reduction='sum').item() # sum up batch loss pred = output.argmax( dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) Logger.current_logger().report_scalar("test", "loss", iteration=epoch, value=test_loss) Logger.current_logger().report_scalar("test", "accuracy", iteration=epoch, value=(correct / len(test_loader.dataset))) print( '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
def on_epoch_end(self, epoch, logs=None): Logger.current_logger().report_scalar( "loss", "train", iteration=self.epoch_ref + epoch, value=logs["loss"]) Logger.current_logger().report_scalar( "loss", "validation", iteration=self.epoch_ref + epoch, value=logs["val_loss"]) Logger.current_logger().report_scalar( "accuracy", "train", iteration=self.epoch_ref + epoch, value=logs["binary_accuracy"]) Logger.current_logger().report_scalar( "accuracy", "validation", iteration=self.epoch_ref + epoch, value=logs["val_binary_accuracy"])
def train(args, model, device, train_loader, optimizer, epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: Logger.current_logger().report_scalar( "train", "loss", iteration=(epoch * len(train_loader) + batch_idx), value=loss.item()) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item()))
def test(args, model, device, test_loader, epoch): save_test_loss = [] save_correct = [] model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.nll_loss( output, target, reduction='sum').item() # sum up batch loss pred = output.argmax( dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() save_test_loss.append(test_loss) save_correct.append(correct) test_loss /= len(test_loader.dataset) Logger.current_logger().report_scalar("test", "loss", iteration=epoch, value=test_loss) Logger.current_logger().report_scalar("test", "accuracy", iteration=epoch, value=(correct / len(test_loader.dataset))) Logger.current_logger().report_histogram(title='Histogram example', series='correct', iteration=1, values=save_correct, xaxis='Test', yaxis='Correct') matrix = np.array([save_test_loss, save_correct]) Logger.current_logger().report_confusion_matrix( title='Confusion matrix example', series='Test loss / correct', iteration=1, matrix=matrix) print( '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
model.add(Activation('softmax')) model2 = Sequential() model2.add(Dense(args['layer_4'], input_shape=(784,))) model2.add(Activation('relu')) model.summary() model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) # Advanced: setting model class enumeration labels = dict(('digit_%d' % i, i) for i in range(10)) task.set_model_label_enumeration(labels) output_folder = os.path.join(tempfile.gettempdir(), 'keras_example') board = TensorBoard(log_dir=output_folder, write_images=False) model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.hdf5')) history = model.fit(X_train, Y_train, batch_size=args['batch_size'], epochs=args['epochs'], callbacks=[board, model_store], validation_data=(X_test, Y_test)) score = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) Logger.current_logger().report_scalar(title='evaluate', series='score', value=score[0], iteration=args['epochs']) Logger.current_logger().report_scalar(title='evaluate', series='accuracy', value=score[1], iteration=args['epochs'])
# TRAINS - Example reporting video or audio links/file # import os from trains import Task, Logger task = Task.init(project_name="examples", task_name="audio and video reporting") print('reporting audio and video samples to the debug samples section') # report video, an already uploaded video media (url) Logger.current_logger().report_media( 'video', 'big bunny', iteration=1, url='https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/720/Big_Buck_Bunny_720_10s_1MB.mp4') # report audio, report an already uploaded audio media (url) Logger.current_logger().report_media( 'audio', 'pink panther', iteration=1, url='https://www2.cs.uic.edu/~i101/SoundFiles/PinkPanther30.wav') # report audio, report local media audio file Logger.current_logger().report_media( 'audio', 'tada', iteration=1, local_path=os.path.join('data_samples', 'sample.mp3'))
def validate(model, loader, loss_fn, args, epoch, amp_autocast=suppress, log_suffix=''): batch_time_m = AverageMeter() losses_m = AverageMeter() top1_m = AverageMeter() top5_m = AverageMeter() model.eval() end = time.time() last_idx = len(loader) - 1 with torch.no_grad(): for batch_idx, (input, target) in enumerate(loader): last_batch = batch_idx == last_idx if not args.prefetcher: input = input.cuda() target = target.cuda() if args.channels_last: input = input.contiguous(memory_format=torch.channels_last) with amp_autocast(): output = model(input) if isinstance(output, (tuple, list)): output = output[0] # augmentation reduction reduce_factor = args.tta if reduce_factor > 1: output = output.unfold(0, reduce_factor, reduce_factor).mean(dim=2) target = target[0:target.size(0):reduce_factor] loss = loss_fn(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) if args.distributed: reduced_loss = reduce_tensor(loss.data, args.world_size) acc1 = reduce_tensor(acc1, args.world_size) acc5 = reduce_tensor(acc5, args.world_size) else: reduced_loss = loss.data torch.cuda.synchronize() losses_m.update(reduced_loss.item(), input.size(0)) top1_m.update(acc1.item(), output.size(0)) top5_m.update(acc5.item(), output.size(0)) batch_time_m.update(time.time() - end) end = time.time() if args.local_rank == 0 and (last_batch or batch_idx % args.log_interval == 0): log_name = 'Test' + log_suffix _logger.info( '{0}: [{1:>4d}/{2}] ' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) ' 'Acc@1: {top1.val:>7.4f} ({top1.avg:>7.4f}) ' 'Acc@5: {top5.val:>7.4f} ({top5.avg:>7.4f})'.format( log_name, batch_idx, last_idx, batch_time=batch_time_m, loss=losses_m, top1=top1_m, top5=top5_m)) Logger.current_logger().report_scalar("loss", "test", iteration=(epoch * len(loader) + batch_idx), value=losses_m.avg) Logger.current_logger().report_scalar("top1", "test", iteration=(epoch * len(loader) + batch_idx), value=top1_m.avg) Logger.current_logger().report_scalar("top5", "test", iteration=(epoch * len(loader) + batch_idx), value=top5_m.avg) metrics = OrderedDict([('loss', losses_m.avg), ('top1', top1_m.avg), ('top5', top5_m.avg)]) return metrics
def train_epoch( epoch, model, loader, optimizer, loss_fn, args, lr_scheduler=None, saver=None, output_dir='', amp_autocast=suppress, loss_scaler=None, model_ema=None, mixup_fn=None): if args.mixup_off_epoch and epoch >= args.mixup_off_epoch: if args.prefetcher and loader.mixup_enabled: loader.mixup_enabled = False elif mixup_fn is not None: mixup_fn.mixup_enabled = False second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order batch_time_m = AverageMeter() data_time_m = AverageMeter() losses_m = AverageMeter() top1_m = AverageMeter() top5_m = AverageMeter() model.train() end = time.time() last_idx = len(loader) - 1 num_updates = epoch * len(loader) for batch_idx, (input, target) in enumerate(loader): last_batch = batch_idx == last_idx data_time_m.update(time.time() - end) if not args.prefetcher: input, target = input.cuda(), target.cuda() if mixup_fn is not None: input, target = mixup_fn(input, target) if args.channels_last: input = input.contiguous(memory_format=torch.channels_last) with amp_autocast(): output = model(input) loss = loss_fn(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) if args.distributed: acc1 = reduce_tensor(acc1, args.world_size) acc5 = reduce_tensor(acc5, args.world_size) top1_m.update(acc1.item(), output.size(0)) top5_m.update(acc5.item(), output.size(0)) if not args.distributed: losses_m.update(loss.item(), input.size(0)) optimizer.zero_grad() if loss_scaler is not None: loss_scaler( loss, optimizer, clip_grad=args.clip_grad, parameters=model.parameters(), create_graph=second_order) else: loss.backward(create_graph=second_order) if args.clip_grad is not None: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad) optimizer.step() if model_ema is not None: model_ema.update(model) torch.cuda.synchronize() num_updates += 1 batch_time_m.update(time.time() - end) if last_batch or batch_idx % args.log_interval == 0: lrl = [param_group['lr'] for param_group in optimizer.param_groups] lr = sum(lrl) / len(lrl) if args.distributed: reduced_loss = reduce_tensor(loss.data, args.world_size) losses_m.update(reduced_loss.item(), input.size(0)) if args.local_rank == 0: _logger.info( 'Train: {} [{:>4d}/{} ({:>3.0f}%)] ' 'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) ' 'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s ' '({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' 'LR: {lr:.3e} ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f})'.format( epoch, batch_idx, len(loader), 100. * batch_idx / last_idx, loss=losses_m, batch_time=batch_time_m, rate=input.size(0) * args.world_size / batch_time_m.val, rate_avg=input.size(0) * args.world_size / batch_time_m.avg, lr=lr, data_time=data_time_m)) Logger.current_logger().report_scalar("loss", "train", iteration=(epoch * len(loader) + batch_idx), value=losses_m.avg) Logger.current_logger().report_scalar("top1", "train", iteration=(epoch * len(loader) + batch_idx), value=top1_m.avg) Logger.current_logger().report_scalar("top5", "train", iteration=(epoch * len(loader) + batch_idx), value=top5_m.avg) Logger.current_logger().report_scalar("lr", "train", iteration=(epoch * len(loader) + batch_idx), value=lr) if args.save_images and output_dir: torchvision.utils.save_image( input, os.path.join(output_dir, 'train-batch-%d.jpg' % batch_idx), padding=0, normalize=True) if saver is not None and args.recovery_interval and ( last_batch or (batch_idx + 1) % args.recovery_interval == 0): saver.save_recovery(epoch, batch_idx=batch_idx) if lr_scheduler is not None: lr_scheduler.step_update(num_updates=num_updates, metric=losses_m.avg) end = time.time() # end for if hasattr(optimizer, 'sync_lookahead'): optimizer.sync_lookahead() return OrderedDict([('loss', losses_m.avg)])