momentum=0.9) elif i == 48000: optimizer = optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-4, momentum=0.9) elif i == 64000: end_time = time.time() print("total time %.1f h" % ((end_time - start_time) / 3600)) sys.exit(0) # shift to train mode model.train() # get the inputs inputs, labels = mixup(data1, data2, 0.2) inputs = inputs.cuda() labels = labels.cuda() # forward + backward + optimize outputs = model(inputs) loss = criterion(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() # count acc,loss on trainset total += labels.size(0) predicted = outputs.data.argmax(dim=1) correct += (predicted == labels.argmax(dim=1)).sum().item() train_loss += loss.item()
def train_mixmatch(label_loader, unlabel_loader, num_classes, model, optimizer, ema_optimizer, epoch, args): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_x = AverageMeter() losses_u = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() weights = AverageMeter() nu = 2 end = time.time() label_iter = iter(label_loader) unlabel_iter = iter(unlabel_loader) model.train() for i in range(args.val_iteration): try: (input, _), target = next(label_iter) except: label_iter = iter(label_loader) (input, _), target = next(label_iter) try: (input_ul, input1_ul), _ = next(unlabel_iter) except: unlabel_iter = iter(unlabel_loader) (input_ul, input1_ul), _ = next(unlabel_iter) bs = input.size(0) # measure data loading time data_time.update(time.time() - end) input, target = input.cuda(), target.cuda(non_blocking=True) input_ul, input1_ul = input_ul.cuda(), input1_ul.cuda() with torch.no_grad(): # compute guess label logits = model(torch.cat([input_ul, input1_ul], dim=0)) p = torch.nn.functional.softmax(logits, dim=-1).view( nu, -1, logits.shape[1]) p_target = p.mean(dim=0).pow(1. / args.T) p_target /= p_target.sum(dim=1, keepdim=True) guess = p_target.detach_() assert input.shape[0] == input_ul.shape[0] # mixup target_in_onehot = torch.zeros( bs, num_classes).float().cuda().scatter_(1, target.view(-1, 1), 1) mixed_input, mixed_target = mixup( torch.cat([input] + [input_ul, input1_ul], dim=0), torch.cat([target_in_onehot] + [guess] * nu, dim=0), beta=args.beta) # reshape to (nu+1, bs, w, h, c) mixed_input = mixed_input.reshape([nu + 1] + list(input.shape)) # reshape to (nu+1, bs) mixed_target = mixed_target.reshape([nu + 1] + list(target_in_onehot.shape)) input_x, input_u = mixed_input[0], mixed_input[1:] target_x, target_u = mixed_target[0], mixed_target[1:] model.train() batches = interleave([input_x, input_u[0], input_u[1]], bs) logits = [model(batches[0])] for batchi in batches[1:]: logits.append(model(batchi)) logits = interleave(logits, bs) logits_x = logits[0] logits_u = torch.cat(logits[1:], 0) # loss # cross entropy loss for soft label loss_xe = torch.mean( torch.sum(-target_x * F.log_softmax(logits_x, dim=-1), dim=1)) # L2 loss loss_l2u = F.mse_loss(F.softmax(logits_u, dim=-1), target_u.reshape(nu * bs, num_classes)) # weight for unlabeled loss with warmup w_match = args.lambda_u * linear_rampup(epoch + i / args.val_iteration, args.epochs) loss = loss_xe + w_match * loss_l2u # measure accuracy and record loss prec1, prec5 = accuracy(logits_x, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) losses_x.update(loss_xe.item(), input.size(0)) losses_u.update(loss_l2u.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) weights.update(w_match, input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() ema_optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Loss_x {loss_x.val:.4f} ({loss_x.avg:.4f})\t' 'Loss_u {loss_u.val:.4f} ({loss_u.avg:.4f})\t' 'Ws {ws.val:.4f}\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, args.val_iteration, batch_time=batch_time, data_time=data_time, loss=losses, loss_x=losses_x, loss_u=losses_u, ws=weights, top1=top1, top5=top5)) return top1.avg, top5.avg, losses.avg, losses_x.avg, losses_u.avg, weights.avg
def train(epoch, model, optimizer, scheduler, criterion, train_loader, config, writer): global global_step run_config = config['run_config'] optim_config = config['optim_config'] data_config = config['data_config'] logger.info('Train {}'.format(epoch)) model.train() loss_meter = AverageMeter() accuracy_meter = AverageMeter() start = time.time() for step, (data, targets) in enumerate(train_loader): global_step += 1 if data_config['use_mixup']: data, targets = mixup(data, targets, data_config['mixup_alpha'], data_config['n_classes']) if run_config['tensorboard_train_images']: if step == 0: image = torchvision.utils.make_grid(data, normalize=True, scale_each=True) writer.add_image('Train/Image', image, epoch) if optim_config['scheduler'] == 'multistep': scheduler.step(epoch - 1) elif optim_config['scheduler'] == 'cosine': scheduler.step() if run_config['tensorboard']: if optim_config['scheduler'] != 'none': lr = scheduler.get_lr()[0] else: lr = optim_config['base_lr'] writer.add_scalar('Train/LearningRate', lr, global_step) if run_config['use_gpu']: data = data.cuda() targets = targets.cuda() data = Variable(data) targets = Variable(targets) optimizer.zero_grad() outputs = model(data) loss = criterion(outputs, targets) loss.backward() optimizer.step() _, preds = torch.max(outputs, dim=1) loss_ = loss.data[0] if data_config['use_mixup']: _, targets = targets.max(dim=1) correct_ = preds.eq(targets).cpu().sum().data.numpy()[0] num = data.size(0) accuracy = correct_ / num loss_meter.update(loss_, num) accuracy_meter.update(accuracy, num) if run_config['tensorboard']: writer.add_scalar('Train/RunningLoss', loss_, global_step) writer.add_scalar('Train/RunningAccuracy', accuracy, global_step) if step % 100 == 0: logger.info('Epoch {} ' 'Train Loss {:.3f} ' 'Train Acc {:.4f}'.format( epoch, loss_meter.avg, accuracy_meter.avg, )) elapsed = time.time() - start logger.info('Elapsed {:.2f}'.format(elapsed)) if run_config['tensorboard']: writer.add_scalar('Train/Loss', loss_meter.avg, epoch) writer.add_scalar('Train/Accuracy', accuracy_meter.avg, epoch) writer.add_scalar('Train/Time', elapsed, epoch)
def train(epoch, model, optimizer, scheduler, criterion, train_loader, config, writer, moment_dict): global global_step run_config = config['run_config'] optim_config = config['optim_config'] data_config = config['data_config'] logger.info('Train {}'.format(epoch)) model.train() loss_meter = AverageMeter() loss_before_meter = AverageMeter() # re-evaluate error on images before the gradient update, first 20 batches loss_after_meter = AverageMeter() # re-evaluate error on images after the gradient update, first 20 batches accuracy_meter = AverageMeter() # approximate losses and average meters are assembled here apx_meters = { 'vanilla': AverageMeter(), 'mixup': AverageMeter(), 'doublesum': AverageMeter() } apx_callbacks = { 'vanilla': lambda imgs, lbls, mdl: apx.vanilla_loss(imgs, lbls, mdl, run_config['use_gpu']), 'mixup': lambda imgs, lbls, mdl: apx.mixup_loss(imgs, lbls, data_config['mixup_alpha'], data_config['n_classes'], data_config['fixlam'], mdl, run_config['use_gpu']), 'doublesum': lambda imgs, lbls, mdl: apx.doublesum_loss(imgs, lbls, data_config['mixup_alpha'], data_config['n_classes'], data_config['fixlam'], mdl, run_config['use_gpu']) } ### take 2 for computing apx losses ### # we're going to store all the images that we saw throughout this epoch # and compute our loss on these at the end - this is effectively what is being optimised # then recompute a mixed up dataset and compute the loss on that - this is our (likely close) approximation # for double sum loss # actually, for validity: do this batchwise too - makes your life easier too since you don't need to load up all images and labels images_train = [] #: all images that were encountered in this epoch labels_train = [] #: all labels that were encountered in this epoch images_eval = [] # images that we're lining up for eval at the end of the epoch labels_eval = [] images_eval2 = [] # second trial to check concentration labels_eval2 = [] start = time.time() for step, (data, targets) in enumerate(train_loader): global_step += 1 images = copy.deepcopy(data) labels = copy.deepcopy(targets) if data_config['use_mixup']: data, targets = mixup(data, targets, data_config['mixup_alpha'], data_config['n_classes'], data_config['fixtrainlam'], True) # assembling the data for our doublesum apx test here images_train.append(copy.deepcopy(data)) labels_train.append(copy.deepcopy(targets)) if run_config['tensorboard_train_images']: if step == 0: image = torchvision.utils.make_grid( data, normalize=True, scale_each=True) writer.add_image('Train/Image', image, epoch) if optim_config['scheduler'] == 'multistep': scheduler.step(epoch - 1) elif optim_config['scheduler'] == 'cosine': scheduler.step() if run_config['tensorboard']: if optim_config['scheduler'] != 'none': lr = scheduler.get_lr()[0] else: lr = optim_config['base_lr'] writer.add_scalar('Train/LearningRate', lr, global_step) if run_config['use_gpu']: data = data.cuda() targets = targets.cuda() optimizer.zero_grad() outputs = model(data) loss = criterion(outputs, targets) loss.backward() optimizer.step() _, preds = torch.max(outputs, dim=1) loss_ = loss.item() # compute loss after the gradient update outputs = model(data) newloss = criterion(outputs, targets) newloss_ = newloss.item() if data_config['use_mixup']: _, targets = targets.max(dim=1) correct_ = preds.eq(targets).sum().item() num = data.size(0) accuracy = correct_ / num loss_meter.update(loss_, num) accuracy_meter.update(accuracy, num) # this is where the approximate losses are computed # loss_before_meter.update(loss_, num) # now we're not restricting batches # loss_after_meter.update(loss_, num) #if step < data_config['doublesum_batches']: #for k in apx_meters.keys(): #l = apx_callbacks[k](images, labels, model) #apx_meters[k].update(l.item(), num) #loss_before_meter.update(loss_, num) #loss_after_meter.update(newloss_, num) if data_config['compute_mixup_reg'] > 0: # batch sizee N = data_config['batch_size'] # original shape of images data_shape = data.shape # datavar = torch.autograd.Variable(data), requires_grad=True) # hello = model(data) # data_flat is a stack of rows, where each row # is a flattened data point: # --- data_flat[i,:] = data[i,:,...,:].reshape((1, int(data.numel() / N))) data_flat = data.reshape((N, int(data.numel() / N))) # y_vec is a stack of rows, where each row is the one_hot version # of the correct label y_vec = torch.zeros((N, targets.max() + 1)).cuda() y_vec[np.arange(N), targets] = 1 # vec to take action of hessian on V = (data_flat - data_flat.sum(axis=0)).detach().clone() W = (y_vec - y_vec.sum(axis=0)).detach().clone() X = 2*torch.ones((2, 2)).cuda() Y = 2*torch.ones((2, 2)).cuda() V = torch.ones((2,2)).cuda() hvprod = taylor.hess_quadratic(lambda x, y : torch.sum(x.pow(2) + y.pow(2)), lambda x: x, X.shape, X, Y, 'x', 'x', V, V) print(hvprod) if run_config['tensorboard']: writer.add_scalar('Train/RunningLoss', loss_, global_step) writer.add_scalar('Train/RunningAccuracy', accuracy, global_step) if step % 100 == 0: logger.info('Epoch {} Step {}/{} ' 'Loss {:.4f} ({:.4f}) ' 'Accuracy {:.4f} ({:.4f})'.format( epoch, step, len(train_loader), loss_meter.val, loss_meter.avg, accuracy_meter.val, accuracy_meter.avg, )) ret = [epoch, loss_meter.avg, accuracy_meter.avg] if data_config['use_mixup'] and (epoch <= 4 or epoch % 5 == 0): model.eval() # reiterating through trainloader to completely separate the construction of the eval sets from the train set for step, (data, targets) in enumerate(train_loader): old_data = copy.deepcopy(data) old_targets = copy.deepcopy(targets) data_eval, targets_eval = mixup(old_data, old_targets, data_config['mixup_alpha'], data_config['n_classes'], data_config['fixlam'], True) images_eval.append(copy.deepcopy(data_eval)) labels_eval.append(copy.deepcopy(targets_eval)) for step, (data, targets) in enumerate(train_loader): old_data = copy.deepcopy(data) old_targets = copy.deepcopy(targets) data_eval2, targets_eval2 = mixup(old_data, old_targets, data_config['mixup_alpha'], data_config['n_classes'], data_config['fixlam'], True) images_eval2.append(copy.deepcopy(data_eval2)) labels_eval2.append(copy.deepcopy(targets_eval2)) # evaluating approximate losses images_train = torch.cat(images_train) labels_train = torch.cat(labels_train) images_eval = torch.cat(images_eval) labels_eval = torch.cat(labels_eval) images_eval2 = torch.cat(images_eval2) labels_eval2 = torch.cat(labels_eval2) apxloss_train = apx.compute_loss(images_train, labels_train, model, run_config['use_gpu']) apxloss_eval = apx.compute_loss(images_eval, labels_eval, model, run_config['use_gpu']) apxloss_eval2 = apx.compute_loss(images_eval2, labels_eval2, model, run_config['use_gpu']) logger.info('Train {:.4f}, Eval {:.4f}, Eval retrial {:.4f}'.format( apxloss_train, apxloss_eval, apxloss_eval2 )) ret.append(apxloss_train.item()) ret.append(apxloss_eval.item()) ret.append(apxloss_eval2.item()) model.train() # compute Taylor approximate loss if data_config['cov_components'] > 0 and (epoch <= 4 or epoch % 5 == 0): model.eval() base_meter = AverageMeter() de_meter = AverageMeter() d2_meters = {} d2e_meters = {} num_components_list = [1, 2, 5, 20, 50, 200] for k in num_components_list: d2_meters[k] = AverageMeter() d2e_meters[k] = AverageMeter() d2_batch_counts = {} for k in num_components_list: d2_batch_counts[k] = 10 d2e_batch_counts = { 1: 10, 2: 10, 5: 4, 20: 2, 50: 2, 200: 1 } max_batch_count = 10 for step, (data, targets) in enumerate(train_loader): if step == max_batch_count: break num = data.shape[0] # base term base = taylor.taylor_loss_base( data.cuda(), targets.cuda(), model, moment_dict['xbar'], moment_dict['ybar'], moment_dict['Uxx'], moment_dict['Sxx'], moment_dict['Vxx'], moment_dict['Uxy'], moment_dict['Sxy'], moment_dict['Vxy'], moment_dict['T_U'], moment_dict['T_S'], moment_dict['T_V'], ) base_meter.update(base, num) # de term de = taylor.taylor_loss_de( data.cuda(), targets.cuda(), model, moment_dict['xbar'], moment_dict['ybar'], moment_dict['Uxx'], moment_dict['Sxx'], moment_dict['Vxx'], moment_dict['Uxy'], moment_dict['Sxy'], moment_dict['Vxy'], moment_dict['T_U'], moment_dict['T_S'], moment_dict['T_V'], ) de_meter.update(de, num) # d2 term d2_dict = taylor.taylor_loss_d2( data.cuda(), targets.cuda(), model, moment_dict['xbar'], moment_dict['ybar'], moment_dict['Uxx'], moment_dict['Sxx'], moment_dict['Vxx'], moment_dict['Uxy'], moment_dict['Sxy'], moment_dict['Vxy'], moment_dict['T_U'], moment_dict['T_S'], moment_dict['T_V'], ) logger.info("Computed base, de, d2") for k in num_components_list: d2_meters[k].update(d2_dict[k], num) # d2e term kmax = max([k for k in num_components_list if d2e_batch_counts[k] > step]) d2e_dict = taylor.taylor_loss_d2e( data.cuda(), targets.cuda(), model, moment_dict['xbar'], moment_dict['ybar'], moment_dict['Uxx'], moment_dict['Sxx'], moment_dict['Vxx'], moment_dict['Uxy'], moment_dict['Sxy'], moment_dict['Vxy'], moment_dict['T_U'][:, :, :kmax], moment_dict['T_S'][:, :kmax], moment_dict['T_V'][:, :, :kmax], ) logger.info("Computed d2e, batch id") for k in num_components_list: if k <= kmax: d2e_meters[k].update(d2e_dict[k], num) logger.info("Done batch") logger.info("CHECKS") print("Base", base_meter.count, base_meter.avg) print("DE", de_meter.count, de_meter.avg) for k in num_components_list: print("d2", k, d2_meters[k].count, d2_meters[k].avg) for k in num_components_list: print("d2e", k, d2e_meters[k].count, d2e_meters[k].avg) ret.append(base_meter.avg.item()) ret.append(de_meter.avg.item()) for k in num_components_list: ret.append(d2_meters[k].avg.item()) for k in num_components_list: ret.append(d2e_meters[k].avg.item()) model.train() elapsed = time.time() - start logger.info('Elapsed {:.2f}'.format(elapsed)) #logger.info('Vanilla {:.2f}, Mixup {:.2f}, Double sum {:.2f}, Train before {:.2f}, Train after {:.2f}'.format( # apx_meters['vanilla'].avg, # apx_meters['mixup'].avg, # apx_meters['doublesum'].avg, # loss_before_meter.avg, # loss_after_meter.avg #)) if run_config['tensorboard']: writer.add_scalar('Train/Loss', loss_meter.avg, epoch) writer.add_scalar('Train/Accuracy', accuracy_meter.avg, epoch) writer.add_scalar('Train/Time', elapsed, epoch) if epoch <= 4 or epoch % 5 == 0: return ret else: return []
def train(epoch, model, optimizer, criterion, train_loader, config, writer): global global_step run_config = config['run_config'] data_config = config['data_config'] logger.info('Train {}'.format(epoch)) model.train() loss_meter = AverageMeter() accuracy_meter = AverageMeter() start = time.time() for step, (data, targets) in enumerate(train_loader): global_step += 1 if data_config['use_mixup']: data, targets = mixup(data, targets, data_config['mixup_alpha'], data_config['n_classes']) if run_config['tensorboard'] and step == 0: image = torchvision.utils.make_grid(data, normalize=True, scale_each=True) writer.add_image('Train/Image', image, epoch) data = data.cuda() targets = targets.cuda() optimizer.zero_grad() outputs = model(data) loss = criterion(outputs, targets) loss.backward() optimizer.step() _, preds = torch.max(outputs, dim=1) loss_ = loss.item() if data_config['use_mixup']: _, targets = targets.max(dim=1) correct_ = preds.eq(targets).sum().item() num = data.size(0) accuracy = correct_ / num loss_meter.update(loss_, num) accuracy_meter.update(accuracy, num) if run_config['tensorboard']: writer.add_scalar('Train/RunningLoss', loss_, global_step) writer.add_scalar('Train/RunningAccuracy', accuracy, global_step) if step % 100 == 0: logger.info('Epoch {} Step {}/{} ' 'Loss {:.4f} ({:.4f}) ' 'Accuracy {:.4f} ({:.4f})'.format( epoch, step, len(train_loader), loss_meter.val, loss_meter.avg, accuracy_meter.val, accuracy_meter.avg, )) elapsed = time.time() - start logger.info('Elapsed {:.2f}'.format(elapsed)) if run_config['tensorboard']: writer.add_scalar('Train/Loss', loss_meter.avg, epoch) writer.add_scalar('Train/Accuracy', accuracy_meter.avg, epoch) writer.add_scalar('Train/Time', elapsed, epoch)
def train(epoch, model, optimizer, scheduler, criterion, train_loader, config, writer, AT): global global_step run_config = config['run_config'] optim_config = config['optim_config'] data_config = config['data_config'] logger.info('Train {}'.format(epoch)) model.train() loss_meter = AverageMeter() accuracy_meter = AverageMeter() start = time.time() for step, (data, targets) in enumerate(train_loader): global_step += 1 if data_config['use_mixup']: data, targets = mixup(data, targets, data_config['mixup_alpha'], data_config['n_classes']) if run_config['tensorboard_train_images']: if step == 0: image = torchvision.utils.make_grid(data, normalize=True, scale_each=True) writer.add_image('Train/Image', image, epoch) if optim_config['scheduler'] == 'multistep': scheduler.step(epoch - 1) elif optim_config['scheduler'] == 'cosine': scheduler.step() if run_config['tensorboard']: if optim_config['scheduler'] != 'none': lr = scheduler.get_lr()[0] else: lr = optim_config['base_lr'] writer.add_scalar('Train/LearningRate', lr, global_step) if run_config['use_gpu']: data = data.cuda() targets = targets.cuda() optimizer.zero_grad() if AT: # all for the attack mean = torch.FloatTensor( np.array([0.4914, 0.4822, 0.4465])[None, :, None, None]).cuda() std = torch.FloatTensor( np.array([0.2470, 0.2435, 0.2616])[None, :, None, None]).cuda() data = data.mul_(std).add_(mean) atk = torchattacks.PGD(model, eps=5 / 255, alpha=0.5 / 255, steps=10) data = atk(data, targets) data = data.sub_(mean).div_(std) # end of attack outputs = model(data) loss = criterion(outputs, targets) # SD if optim_config['SD'] != 0.0: loss += (outputs**2).mean() * optim_config['SD'] loss.backward() optimizer.step() _, preds = torch.max(outputs, dim=1) loss_ = loss.item() if data_config['use_mixup']: _, targets = targets.max(dim=1) correct_ = preds.eq(targets).sum().item() num = data.size(0) accuracy = correct_ / num loss_meter.update(loss_, num) accuracy_meter.update(accuracy, num) if run_config['tensorboard']: writer.add_scalar('Train/RunningLoss', loss_, global_step) writer.add_scalar('Train/RunningAccuracy', accuracy, global_step) if step % 100 == 0: logger.info('Epoch {} Step {}/{} ' 'Loss {:.4f} ({:.4f}) ' 'Accuracy {:.4f} ({:.4f})'.format( epoch, step, len(train_loader), loss_meter.val, loss_meter.avg, accuracy_meter.val, accuracy_meter.avg, )) elapsed = time.time() - start logger.info('Elapsed {:.2f}'.format(elapsed)) if run_config['tensorboard']: writer.add_scalar('Train/Loss', loss_meter.avg, epoch) writer.add_scalar('Train/Accuracy', accuracy_meter.avg, epoch) writer.add_scalar('Train/Time', elapsed, epoch)
def mixup_loss(images, labels, alpha, n_classes, fixlam, model, use_gpu): miximages, mixlabels = mixup(images, labels, alpha, n_classes, fixlam) return compute_loss(miximages, mixlabels, model, use_gpu)
def main(): """Model training.""" train_speakers, valid_speakers = get_valid_speakers() # define transforms for train & validation samples train_transform = Compose([Resize(760, 80), ToTensor()]) # define datasets & loaders train_dataset = TrainDataset('train', train_speakers, transform=train_transform) valid_dataset = TrainDataset('train', valid_speakers, transform=train_transform) train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=256, shuffle=False) device = get_device() print(f'Selected device: {device}') model = torch.hub.load('huawei-noah/ghostnet', 'ghostnet_1x', pretrained=True) model.classifier = nn.Linear(in_features=1280, out_features=1, bias=True) net = model net.to(device) criterion = nn.BCEWithLogitsLoss() optimizer = AdaBelief(net.parameters(), lr=1e-3) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=3, eps=1e-4, verbose=True) # prepare valid target yvalid = get_valid_targets(valid_dataset) # training loop for epoch in range(10): loss_log = {'train': [], 'valid': []} train_loss = [] net.train() for x, y in tqdm(train_loader): x, y = mixup(x, y, alpha=0.2) x, y = x.to(device), y.to(device, dtype=torch.float32) optimizer.zero_grad() outputs = net(x) loss = criterion(outputs, y.unsqueeze(1)) loss.backward() optimizer.step() # save loss train_loss.append(loss.item()) # evaluate net.eval() valid_pred = torch.Tensor([]).to(device) for x, y in valid_loader: with torch.no_grad(): x, y = x.to(device), y.to(device, dtype=torch.float32) ypred = net(x) valid_pred = torch.cat([valid_pred, ypred], 0) valid_pred = sigmoid(valid_pred.cpu().numpy()) val_loss = log_loss(yvalid, valid_pred, eps=1e-7) val_acc = (yvalid == (valid_pred > 0.5).astype(int).flatten()).mean() tqdm.write( f'Epoch {epoch} train_loss={np.mean(train_loss):.4f}; val_loss={val_loss:.4f}; val_acc={val_acc:.4f}' ) loss_log['train'].append(np.mean(train_loss)) loss_log['valid'].append(val_loss) scheduler.step(loss_log['valid'][-1]) torch.save(net.state_dict(), 'ghostnet_model.pt') print('Training is complete.')