def job(tuning, params_path, devices, resume, save_interval): global params if tuning: with open(params_path, 'r') as f: params = json.load(f) mode_str = 'tuning' setting = '_'.join(f'{tp}-{params[tp]}' for tp in params['tuning_params']) else: mode_str = 'train' setting = '' exp_path = ROOT + f'experiments/{params["ex_name"]}/' os.environ['CUDA_VISIBLE_DEVICES'] = devices if resume is None: # C-AIRとABCIで整合性が取れるようにしている。 params[ 'base_ckpt_path'] = f'experiments/v1only/ep4_augmentation-soft_epochs-5_loss-{params["loss"]}.pth' params[ 'clean_path'] = ROOT + f'input/clean/train19_cleaned_verifythresh{params["verifythresh"]}_freqthresh{params["freqthresh"]}.csv' else: params = utils.load_checkpoint(path=resume, params=True)['params'] logger, writer = utils.get_logger( log_dir=exp_path + f'{mode_str}/log/{setting}', tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}') if params['augmentation'] == 'soft': params['scale_limit'] = 0.2 params['brightness_limit'] = 0.1 elif params['augmentation'] == 'middle': params['scale_limit'] = 0.3 params['shear_limit'] = 4 params['brightness_limit'] = 0.1 params['contrast_limit'] = 0.1 else: raise ValueError train_transform, eval_transform = data_utils.build_transforms( scale_limit=params['scale_limit'], shear_limit=params['shear_limit'], brightness_limit=params['brightness_limit'], contrast_limit=params['contrast_limit'], ) data_loaders = data_utils.make_train_loaders( params=params, data_root=ROOT + 'input/' + params['data'], train_transform=train_transform, eval_transform=eval_transform, scale='SS2', test_size=0, class_topk=params['class_topk'], num_workers=8) model = models.LandmarkNet( n_classes=params['class_topk'], model_name=params['model_name'], pooling=params['pooling'], loss_module=params['loss'], s=params['s'], margin=params['margin'], theta_zero=params['theta_zero'], use_fc=params['use_fc'], fc_dim=params['fc_dim'], ).cuda() criterion = nn.CrossEntropyLoss() optimizer = utils.get_optim(params, model) if resume is None: sdict = torch.load(ROOT + params['base_ckpt_path'])['state_dict'] if params['loss'] == 'adacos': del sdict['final.W'] # remove fully-connected layer elif params['loss'] == 'softmax': del sdict['final.weight'], sdict[ 'final.bias'] # remove fully-connected layer else: del sdict['final.weight'] # remove fully-connected layer model.load_state_dict(sdict, strict=False) scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=params['epochs'] * len(data_loaders['train']), eta_min=3e-6) start_epoch, end_epoch = (0, params['epochs'] - params['scaleup_epochs']) else: ckpt = utils.load_checkpoint(path=resume, model=model, optimizer=optimizer, epoch=True) model, optimizer, start_epoch = ckpt['model'], ckpt[ 'optimizer'], ckpt['epoch'] + 1 end_epoch = params['epochs'] scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=params['epochs'] * len(data_loaders['train']), eta_min=3e-6, last_epoch=start_epoch * len(data_loaders['train'])) setting += 'scaleup_' + resume.split('/')[-1].replace('.pth', '') data_loaders = data_utils.make_verified_train_loaders( params=params, data_root=ROOT + 'input/' + params['data'], train_transform=train_transform, eval_transform=eval_transform, scale='M2', test_size=0, num_workers=8) batch_norm.freeze_bn(model) if len(devices.split(',')) > 1: model = nn.DataParallel(model) for epoch in range(start_epoch, end_epoch): logger.info(f'Epoch {epoch}/{end_epoch}') # ============================== train ============================== # model.train(True) losses = utils.AverageMeter() prec1 = utils.AverageMeter() for i, (_, x, y) in tqdm(enumerate(data_loaders['train']), total=len(data_loaders['train']), miniters=None, ncols=55): x = x.to('cuda') y = y.to('cuda') outputs = model(x, y) loss = criterion(outputs, y) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() acc = metrics.accuracy(outputs, y) losses.update(loss.item(), x.size(0)) prec1.update(acc, x.size(0)) if i % 100 == 99: logger.info( f'{epoch+i/len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}' ) train_loss = losses.avg train_acc = prec1.avg writer.add_scalars('Loss', {'train': train_loss}, epoch) writer.add_scalars('Acc', {'train': train_acc}, epoch) writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch) if (epoch + 1) == end_epoch or (epoch + 1) % save_interval == 0: output_file_name = exp_path + f'ep{epoch}_' + setting + '.pth' utils.save_checkpoint(path=output_file_name, model=model, epoch=epoch, optimizer=optimizer, params=params) model = model.module datasets = ('oxford5k', 'paris6k', 'roxford5k', 'rparis6k') results = eval_datasets(model, datasets=datasets, ms=True, tta_gem_p=1.0, logger=logger) if tuning: tuning_result = {} for d in datasets: if d in ('oxford5k', 'paris6k'): tuning_result[d] = results[d] else: for key in ['mapE', 'mapM', 'mapH']: mapE, mapM, mapH, mpE, mpM, mpH, kappas = results[d] tuning_result[d + '-' + key] = [eval(key)] utils.write_tuning_result(params, tuning_result, exp_path + 'tuning/results.csv')
def job(tuning, params_path, devices, resume, save_interval): global params if tuning: with open(params_path, 'r') as f: params = json.load(f) mode_str = 'tuning' setting = '_'.join(f'{tp}-{params[tp]}' for tp in params['tuning_params']) else: mode_str = 'train' setting = '' # パラメーターを変えるときにseedも変えたい(seed averagingの効果を期待) seed = sum(ord(_) for _ in str(params.values())) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.benchmark = False exp_path = ROOT + f'experiments/{params["ex_name"]}/' os.environ['CUDA_VISIBLE_DEVICES'] = devices logger, writer = utils.get_logger( log_dir=exp_path + f'{mode_str}/log/{setting}', tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}') if params['augmentation'] == 'soft': params['scale_limit'] = 0.2 params['brightness_limit'] = 0.1 elif params['augmentation'] == 'middle': params['scale_limit'] = 0.3 params['shear_limit'] = 4 params['brightness_limit'] = 0.1 params['contrast_limit'] = 0.1 else: raise ValueError train_transform, eval_transform = data_utils.build_transforms( scale_limit=params['scale_limit'], shear_limit=params['shear_limit'], brightness_limit=params['brightness_limit'], contrast_limit=params['contrast_limit'], ) data_loaders = data_utils.make_train_loaders( params=params, data_root=ROOT + 'input/' + params['data'], train_transform=train_transform, eval_transform=eval_transform, scale='S', test_size=0, class_topk=params['class_topk'], num_workers=8) model = models.LandmarkNet( n_classes=params['class_topk'], model_name=params['model_name'], pooling=params['pooling'], loss_module=params['loss'], s=params['s'], margin=params['margin'], theta_zero=params['theta_zero'], use_fc=params['use_fc'], fc_dim=params['fc_dim'], ).cuda() optimizer = utils.get_optim(params, model) criterion = nn.CrossEntropyLoss() scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=params['epochs'] * len(data_loaders['train']), eta_min=3e-6) start_epoch = 0 if len(devices.split(',')) > 1: model = nn.DataParallel(model) for epoch in range(start_epoch, params['epochs']): logger.info( f'Epoch {epoch}/{params["epochs"]} | lr: {optimizer.param_groups[0]["lr"]}' ) # ============================== train ============================== # model.train(True) losses = utils.AverageMeter() prec1 = utils.AverageMeter() for i, (_, x, y) in tqdm(enumerate(data_loaders['train']), total=len(data_loaders['train']), miniters=None, ncols=55): x = x.to('cuda') y = y.to('cuda') outputs = model(x, y) loss = criterion(outputs, y) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() acc = metrics.accuracy(outputs, y) losses.update(loss.item(), x.size(0)) prec1.update(acc, x.size(0)) if i % 100 == 99: logger.info( f'{epoch+i/len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}' ) train_loss = losses.avg train_acc = prec1.avg writer.add_scalars('Loss', {'train': train_loss}, epoch) writer.add_scalars('Acc', {'train': train_acc}, epoch) writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch) if (epoch + 1) == params['epochs'] or (epoch + 1) % save_interval == 0: output_file_name = exp_path + f'ep{epoch}_' + setting + '.pth' utils.save_checkpoint(path=output_file_name, model=model, epoch=epoch, optimizer=optimizer, params=params) model = model.module datasets = ('roxford5k', 'rparis6k') results = eval_datasets(model, datasets=datasets, ms=False, tta_gem_p=1.0, logger=logger) if tuning: tuning_result = {} for d in datasets: for key in ['mapE', 'mapM', 'mapH']: mapE, mapM, mapH, mpE, mpM, mpH, kappas = results[d] tuning_result[d + '-' + key] = [eval(key)] utils.write_tuning_result(params, tuning_result, exp_path + 'tuning/results.csv')
def job(tuning, params_path, devices, resume, save_interval): global params if tuning: with open(params_path, 'r') as f: params = json.load(f) mode_str = 'tuning' setting = '_'.join(f'{tp}-{params[tp]}' for tp in params['tuning_params']) else: mode_str = 'train' setting = '' exp_path = ROOT + f'experiments/{params["ex_name"]}/' os.environ['CUDA_VISIBLE_DEVICES'] = devices logger, writer = utils.get_logger( log_dir=exp_path + f'{mode_str}/log/{setting}', tensorboard_dir=exp_path + f'{mode_str}/tf_board/{setting}') train_transform, eval_transform = build_transforms( scale_range=params['scale_range'], brightness_range=params['brightness_range']) data_loaders = data_utils.make_train_loaders( params=params, data_root=ROOT + 'input/train2018', train_transform=train_transform, eval_transform=eval_transform, class_topk=params['class_topk'], num_workers=8) model = models.LandmarkFishNet( n_classes=params['class_topk'], model_name=params['model_name'], pooling_strings=params['pooling'].split(','), loss_module='arcface', s=30.0, margin=params['margin'], use_fc=params['use_fc'], fc_dim=params['fc_dim'], ).cuda() optimizer = utils.get_optim(params, model) criterion = nn.CrossEntropyLoss() scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=params['epochs'] * len(data_loaders['train']), eta_min=1e-6) if len(devices.split(',')) > 1: model = nn.DataParallel(model) if resume is not None: model, optimizer = utils.load_checkpoint(path=resume, model=model, optimizer=optimizer) for epoch in range(params['epochs']): logger.info( f'Epoch {epoch}/{params["epochs"]} | lr: {optimizer.param_groups[0]["lr"]}' ) # ============================== train ============================== # model.train(True) losses = utils.AverageMeter() prec1 = utils.AverageMeter() for i, (_, x, y) in tqdm(enumerate(data_loaders['train']), total=len(data_loaders['train']), miniters=None, ncols=55): x = x.to('cuda') y = y.to('cuda') outputs = model(x, y) loss = criterion(outputs, y) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() acc = metrics.accuracy(outputs, y) losses.update(loss.item(), x.size(0)) prec1.update(acc, x.size(0)) if i % 100 == 99: logger.info( f'{epoch+i/len(data_loaders["train"]):.2f}epoch | {setting} acc: {prec1.avg}' ) train_loss = losses.avg train_acc = prec1.avg # ============================== validation ============================== # model.train(False) losses.reset() prec1.reset() for i, (_, x, y) in tqdm(enumerate(data_loaders['val']), total=len(data_loaders['val']), miniters=None, ncols=55): x = x.to('cuda') y = y.to('cuda') with torch.no_grad(): outputs = model(x, y) loss = criterion(outputs, y) acc = metrics.accuracy(outputs, y) losses.update(loss.item(), x.size(0)) prec1.update(acc, x.size(0)) val_loss = losses.avg val_acc = prec1.avg logger.info(f'[Val] Loss: \033[1m{val_loss:.4f}\033[0m | ' f'Acc: \033[1m{val_acc:.4f}\033[0m\n') writer.add_scalars('Loss', {'train': train_loss}, epoch) writer.add_scalars('Acc', {'train': train_acc}, epoch) writer.add_scalars('Loss', {'val': val_loss}, epoch) writer.add_scalars('Acc', {'val': val_acc}, epoch) writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch) if save_interval > 0: if (epoch + 1) == params['epochs'] or (epoch + 1) % save_interval == 0: output_file_name = exp_path + f'ep{epoch}_' + setting + '.pth' utils.save_checkpoint(path=output_file_name, model=model, epoch=epoch, optimizer=optimizer, params=params) if tuning: tuning_result = {} for key in ['train_loss', 'train_acc', 'val_loss', 'val_acc']: tuning_result[key] = [eval(key)] utils.write_tuning_result(params, tuning_result, exp_path + 'tuning/results.csv')