def __init__(self, name='efficientnet-b0', head='mlp', feat_dim=128, pretrained=True, **kwargs): super().__init__() if pretrained: self.encoder = EfficientNet.from_pretrained(name, include_top=False) else: self.encoder = EfficientNet.from_name('efficientnet-b0', include_top=False) freeze_bn(self.encoder) # prj head in_feature = model_dict[name] if head == 'linear': self.head = nn.Linear(in_feature, feat_dim) elif head == 'mlp': self.head = nn.Sequential(nn.Linear(in_feature, in_feature), nn.ReLU(inplace=True), nn.Linear(in_feature, feat_dim)) # Classify layer num_classes = kwargs.get('num_classes') self._dropout = nn.Dropout(0.5) self._fc = nn.Linear(in_feature, num_classes)
def train(model, trainloader, optimizer, device, epoch, criterion, step, logger, batch_size, loss_fn): encoder = DataEncoder(loss_fn) model.train() freeze_bn(model) # http://qr.ae/TUIS14 start_time = time.time() for batch_idx, (data, loc_targets, cls_targets, ori_img_shape) in enumerate(trainloader): inputs, loc_targets, cls_targets = data.to(device), loc_targets.to(device), \ cls_targets.to(device) optimizer.zero_grad() loc_preds_split, cls_preds_split = model(inputs.cuda()) loc_preds = torch.cat(loc_preds_split, 1) cls_preds = torch.cat(cls_preds_split, 1) loss, loc_loss, cls_loss=criterion(loc_preds.float(), loc_targets.cuda(), \ cls_preds.float(), cls_targets.cuda()) loss.backward() optimizer.step() step += 1 if batch_idx % 10 == 0: end_time = time.time() print('[%d,%5d] cls_loss: %.5f loc_loss: %.5f train_loss: %.5f time: %.3f lr: %.6f' % \ (epoch, batch_idx, cls_loss, loc_loss, loss, \ end_time-start_time, optimizer.param_groups[0]['lr'])) start_time = time.time() info = {'training loss': loss.item(), 'loc_loss': loc_loss.item(), \ 'cls_loss': cls_loss.item()} for tag, value in info.items(): logger.scalar_summary(tag, value, step) if batch_idx % 200 == 0: pred_boxes, pred_labels, score_all = [], [], [] if batch_size > 10: show_num_img = 10 else: show_num_img = batch_size for img_idx in range(show_num_img): pred_box, pred_label, score = encoder.decode(loc_preds_split, cls_preds_split,\ data.shape, ori_img_shape[img_idx], img_idx) pred_boxes.append(pred_box) pred_labels.append(pred_label) score_all.append(score) info = {'images': inputs[:show_num_img].cpu().numpy()} for tag, images in info.items(): images = logger.image_drawbox(images, pred_boxes, pred_labels, score_all) logger.image_summary(tag, images, step) return step
def main(config): svname = args.name if svname is None: svname = 'meta_{}-{}shot'.format(config['train_dataset'], config['n_shot']) svname += '_' + config['model'] if config['model_args'].get('encoder'): svname += '-' + config['model_args']['encoder'] if config['model_args'].get('prog_synthesis'): svname += '-' + config['model_args']['prog_synthesis'] svname += '-seed' + str(args.seed) if args.tag is not None: svname += '_' + args.tag save_path = os.path.join(args.save_dir, svname) utils.ensure_path(save_path, remove=False) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) logger = utils.Logger(file_name=os.path.join(save_path, "log_sdout.txt"), file_mode="a+", should_flush=True) #### Dataset #### n_way, n_shot = config['n_way'], config['n_shot'] n_query = config['n_query'] if config.get('n_train_way') is not None: n_train_way = config['n_train_way'] else: n_train_way = n_way if config.get('n_train_shot') is not None: n_train_shot = config['n_train_shot'] else: n_train_shot = n_shot if config.get('ep_per_batch') is not None: ep_per_batch = config['ep_per_batch'] else: ep_per_batch = 1 random_state = np.random.RandomState(args.seed) print('seed:', args.seed) # train train_dataset = datasets.make(config['train_dataset'], **config['train_dataset_args']) utils.log('train dataset: {} (x{})'.format(train_dataset[0][0].shape, len(train_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(train_dataset, 'train_dataset', writer) train_sampler = BongardSampler(train_dataset.n_tasks, config['train_batches'], ep_per_batch, random_state.randint(2**31)) train_loader = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) # tvals tval_loaders = {} tval_name_ntasks_dict = { 'tval': 2000, 'tval_ff': 600, 'tval_bd': 480, 'tval_hd_comb': 400, 'tval_hd_novel': 320 } # numbers depend on dataset for tval_type in tval_name_ntasks_dict.keys(): if config.get('{}_dataset'.format(tval_type)): tval_dataset = datasets.make( config['{}_dataset'.format(tval_type)], **config['{}_dataset_args'.format(tval_type)]) utils.log('{} dataset: {} (x{})'.format(tval_type, tval_dataset[0][0].shape, len(tval_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(tval_dataset, 'tval_ff_dataset', writer) tval_sampler = BongardSampler( tval_dataset.n_tasks, n_batch=tval_name_ntasks_dict[tval_type] // ep_per_batch, ep_per_batch=ep_per_batch, seed=random_state.randint(2**31)) tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler, num_workers=8, pin_memory=True) tval_loaders.update({tval_type: tval_loader}) else: tval_loaders.update({tval_type: None}) # val val_dataset = datasets.make(config['val_dataset'], **config['val_dataset_args']) utils.log('val dataset: {} (x{})'.format(val_dataset[0][0].shape, len(val_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(val_dataset, 'val_dataset', writer) val_sampler = BongardSampler(val_dataset.n_tasks, n_batch=900 // ep_per_batch, ep_per_batch=ep_per_batch, seed=random_state.randint(2**31)) val_loader = DataLoader(val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) ######## #### Model and optimizer #### if config.get('load'): print('loading pretrained model: ', config['load']) model = models.load(torch.load(config['load'])) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): print('loading pretrained encoder: ', config['load_encoder']) encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('load_prog_synthesis'): print('loading pretrained program synthesis model: ', config['load_prog_synthesis']) prog_synthesis = models.load( torch.load(config['load_prog_synthesis'])) model.prog_synthesis.load_state_dict(prog_synthesis.state_dict()) if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer(model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'vl', 'va'] tval_tuple_lst = [] for k, v in tval_loaders.items(): if v is not None: loss_key = 'tvl' + k.split('tval')[-1] acc_key = ' tva' + k.split('tval')[-1] aves_keys.append(loss_key) aves_keys.append(acc_key) tval_tuple_lst.append((k, v, loss_key, acc_key)) trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) for data, label in tqdm(train_loader, desc='train', leave=False): x_shot, x_query = fs.split_shot_query(data.cuda(), n_train_way, n_train_shot, n_query, ep_per_batch=ep_per_batch) label_query = fs.make_nk_label(n_train_way, n_query, ep_per_batch=ep_per_batch).cuda() if config['model'] == 'snail': # only use one selected label_query query_dix = random_state.randint(n_train_way * n_query) label_query = label_query.view(ep_per_batch, -1)[:, query_dix] x_query = x_query[:, query_dix:query_dix + 1] if config['model'] == 'maml': # need grad in maml model.zero_grad() logits = model(x_shot, x_query).view(-1, n_train_way) loss = F.cross_entropy(logits, label_query) acc = utils.compute_acc(logits, label_query) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None loss = None # eval model.eval() for name, loader, name_l, name_a in [('val', val_loader, 'vl', 'va') ] + tval_tuple_lst: if config.get('{}_dataset'.format(name)) is None: continue np.random.seed(0) for data, _ in tqdm(loader, desc=name, leave=False): x_shot, x_query = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, ep_per_batch=ep_per_batch) label_query = fs.make_nk_label( n_way, n_query, ep_per_batch=ep_per_batch).cuda() if config[ 'model'] == 'snail': # only use one randomly selected label_query query_dix = random_state.randint(n_train_way) label_query = label_query.view(ep_per_batch, -1)[:, query_dix] x_query = x_query[:, query_dix:query_dix + 1] if config['model'] == 'maml': # need grad in maml model.zero_grad() logits = model(x_shot, x_query, eval=True).view(-1, n_way) loss = F.cross_entropy(logits, label_query) acc = utils.compute_acc(logits, label_query) else: with torch.no_grad(): logits = model(x_shot, x_query, eval=True).view(-1, n_way) loss = F.cross_entropy(logits, label_query) acc = utils.compute_acc(logits, label_query) aves[name_l].add(loss.item()) aves[name_a].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) log_str = 'epoch {}, train {:.4f}|{:.4f}, val {:.4f}|{:.4f}'.format( epoch, aves['tl'], aves['ta'], aves['vl'], aves['va']) for tval_name, _, loss_key, acc_key in tval_tuple_lst: log_str += ', {} {:.4f}|{:.4f}'.format(tval_name, aves[loss_key], aves[acc_key]) writer.add_scalars('loss', {tval_name: aves[loss_key]}, epoch) writer.add_scalars('acc', {tval_name: aves[acc_key]}, epoch) log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate) utils.log(log_str) writer.add_scalars('loss', { 'train': aves['tl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush() print('finished training!') logger.close()
def main(config): svname = args.name if svname is None: svname = 'meta_{}-{}shot'.format( config['train_dataset'], config['n_shot']) svname += '_' + config['model'] + '-' + config['model_args']['encoder'] if args.tag is not None: svname += '_' + args.tag save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### n_way, n_shot = config['n_way'], config['n_shot'] n_query = config['n_query'] if config.get('n_train_way') is not None: n_train_way = config['n_train_way'] else: n_train_way = n_way if config.get('n_train_shot') is not None: n_train_shot = config['n_train_shot'] else: n_train_shot = n_shot if config.get('ep_per_batch') is not None: ep_per_batch = config['ep_per_batch'] else: ep_per_batch = 1 # train train_dataset = datasets.make(config['train_dataset'], **config['train_dataset_args']) utils.log('train dataset: {} (x{}), {}'.format( train_dataset[0][0].shape, len(train_dataset), train_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(train_dataset, 'train_dataset', writer) train_sampler = CategoriesSampler( train_dataset.label, config['train_batches'], n_train_way, n_train_shot + n_query, ep_per_batch=ep_per_batch) train_loader = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) # tval if config.get('tval_dataset'): tval_dataset = datasets.make(config['tval_dataset'], **config['tval_dataset_args']) utils.log('tval dataset: {} (x{}), {}'.format( tval_dataset[0][0].shape, len(tval_dataset), tval_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(tval_dataset, 'tval_dataset', writer) tval_sampler = CategoriesSampler( tval_dataset.label, 200, n_way, n_shot + n_query, ep_per_batch=4) tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler, num_workers=8, pin_memory=True) else: tval_loader = None # val val_dataset = datasets.make(config['val_dataset'], **config['val_dataset_args']) utils.log('val dataset: {} (x{}), {}'.format( val_dataset[0][0].shape, len(val_dataset), val_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(val_dataset, 'val_dataset', writer) val_sampler = CategoriesSampler( val_dataset.label, 200, n_way, n_shot + n_query, ep_per_batch=4) val_loader = DataLoader(val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) ######## #### Model and optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer( model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'tvl', 'tva', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) np.random.seed(epoch) for data, _ in tqdm(train_loader, desc='train', leave=False): x_shot, x_query = fs.split_shot_query( data.cuda(), n_train_way, n_train_shot, n_query, ep_per_batch=ep_per_batch) label = fs.make_nk_label(n_train_way, n_query, ep_per_batch=ep_per_batch).cuda() logits = model(x_shot, x_query).view(-1, n_train_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None; loss = None # eval model.eval() for name, loader, name_l, name_a in [ ('tval', tval_loader, 'tvl', 'tva'), ('val', val_loader, 'vl', 'va')]: if (config.get('tval_dataset') is None) and name == 'tval': continue np.random.seed(0) for data, _ in tqdm(loader, desc=name, leave=False): x_shot, x_query = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, ep_per_batch=4) label = fs.make_nk_label(n_way, n_query, ep_per_batch=4).cuda() with torch.no_grad(): logits = model(x_shot, x_query).view(-1, n_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves[name_l].add(loss.item()) aves[name_a].add(acc) _sig = int(_[-1]) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) utils.log('epoch {}, train {:.4f}|{:.4f}, tval {:.4f}|{:.4f}, ' 'val {:.4f}|{:.4f}, {} {}/{} (@{})'.format( epoch, aves['tl'], aves['ta'], aves['tvl'], aves['tva'], aves['vl'], aves['va'], t_epoch, t_used, t_estimate, _sig)) writer.add_scalars('loss', { 'train': aves['tl'], 'tval': aves['tvl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'tval': aves['tva'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush()
def main(config): svname = config.get('sv_name') if args.tag is not None: svname += '_' + args.tag config['sv_name'] = svname save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) utils.log(svname) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### n_way, n_shot = config['n_way'], config['n_shot'] n_query = config['n_query'] n_pseudo = config['n_pseudo'] ep_per_batch = config['ep_per_batch'] if config.get('test_batches') is not None: test_batches = config['test_batches'] else: test_batches = config['train_batches'] for s in ['train', 'val', 'tval']: if config.get(f"{s}_dataset_args") is not None: config[f"{s}_dataset_args"]['data_dir'] = os.path.join(os.getcwd(), os.pardir, 'data_root') # train train_dataset = CustomDataset(config['train_dataset'], save_dir=config.get('load_encoder'), **config['train_dataset_args']) if config['train_dataset_args']['split'] == 'helper': with open(os.path.join(save_path, 'train_helper_cls.pkl'), 'wb') as f: pkl.dump(train_dataset.dataset_classes, f) train_sampler = EpisodicSampler(train_dataset, config['train_batches'], n_way, n_shot, n_query, n_pseudo, episodes_per_batch=ep_per_batch) train_loader = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=4, pin_memory=True) # tval if config.get('tval_dataset'): tval_dataset = CustomDataset(config['tval_dataset'], **config['tval_dataset_args']) tval_sampler = EpisodicSampler(tval_dataset, test_batches, n_way, n_shot, n_query, n_pseudo, episodes_per_batch=ep_per_batch) tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler, num_workers=4, pin_memory=True) else: tval_loader = None # val val_dataset = CustomDataset(config['val_dataset'], **config['val_dataset_args']) val_sampler = EpisodicSampler(val_dataset, test_batches, n_way, n_shot, n_query, n_pseudo, episodes_per_batch=ep_per_batch) val_loader = DataLoader(val_dataset, batch_sampler=val_sampler, num_workers=4, pin_memory=True) #### Model and optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('freeze_encoder'): for param in model.encoder.parameters(): param.requires_grad = False if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer( model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'tvl', 'tva', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) np.random.seed(epoch) for data in tqdm(train_loader, desc='train', leave=False): x_shot, x_query, x_pseudo = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, n_pseudo, ep_per_batch=ep_per_batch) label = fs.make_nk_label(n_way, n_query, ep_per_batch=ep_per_batch).cuda() logits = model(x_shot, x_query, x_pseudo) logits = logits.view(-1, n_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None; loss = None # eval model.eval() for name, loader, name_l, name_a in [ ('tval', tval_loader, 'tvl', 'tva'), ('val', val_loader, 'vl', 'va')]: if (config.get('tval_dataset') is None) and name == 'tval': continue np.random.seed(0) for data in tqdm(loader, desc=name, leave=False): x_shot, x_query, x_pseudo = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, n_pseudo, ep_per_batch=ep_per_batch) label = fs.make_nk_label(n_way, n_query, ep_per_batch=ep_per_batch).cuda() with torch.no_grad(): logits = model(x_shot, x_query, x_pseudo) logits = logits.view(-1, n_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves[name_l].add(loss.item()) aves[name_a].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) utils.log('epoch {}, train {:.4f}|{:.4f}, tval {:.4f}|{:.4f}, ' 'val {:.4f}|{:.4f}, {} {}/{}'.format( epoch, aves['tl'], aves['ta'], aves['tvl'], aves['tva'], aves['vl'], aves['va'], t_epoch, t_used, t_estimate)) writer.add_scalars('loss', { 'train': aves['tl'], 'tval': aves['tvl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'tval': aves['tva'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush()
def main(config): svname = args.name if svname is None: svname = 'meta' if args.tag is not None: svname += '_' + args.tag save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### if args.dataset == 'all': train_lst = ['ilsvrc_2012', 'omniglot', 'aircraft', 'cu_birds', 'dtd', 'quickdraw', 'fungi', 'vgg_flower'] eval_lst = ['ilsvrc_2012'] else: train_lst = [args.dataset] eval_lst = [args.dataset] if config.get('no_train') == True: train_iter = None else: trainset = make_md(train_lst, 'episodic', split='train', image_size=126) train_iter = trainset.make_one_shot_iterator().get_next() if config.get('no_val') == True: val_iter = None else: valset = make_md(eval_lst, 'episodic', split='val', image_size=126) val_iter = valset.make_one_shot_iterator().get_next() testset = make_md(eval_lst, 'episodic', split='test', image_size=126) test_iter = testset.make_one_shot_iterator().get_next() sess = tf.Session() ######## #### Model and optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer( model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'tvl', 'tva', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] def process_data(e): e = list(e[0]) transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(146), transforms.CenterCrop(128), transforms.ToTensor(), transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) for ii in [0, 3]: e[ii] = ((e[ii] + 1.0) * 0.5 * 255).astype('uint8') tmp = torch.zeros(len(e[ii]), 3, 128, 128).float() for i in range(len(e[ii])): tmp[i] = transform(e[ii][i]) e[ii] = tmp.cuda() e[1] = torch.from_numpy(e[1]).long().cuda() e[4] = torch.from_numpy(e[4]).long().cuda() return e for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) if config.get('no_train') == True: pass else: for i_ep in tqdm(range(config['n_train'])): e = process_data(sess.run(train_iter)) loss, acc = model(e[0], e[1], e[3], e[4]) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) loss = None # eval model.eval() for name, ds_iter, name_l, name_a in [ ('tval', val_iter, 'tvl', 'tva'), ('val', test_iter, 'vl', 'va')]: if config.get('no_val') == True and name == 'tval': continue for i_ep in tqdm(range(config['n_eval'])): e = process_data(sess.run(ds_iter)) with torch.no_grad(): loss, acc = model(e[0], e[1], e[3], e[4]) aves[name_l].add(loss.item()) aves[name_a].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) _sig = 0 t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) utils.log('epoch {}, train {:.4f}|{:.4f}, tval {:.4f}|{:.4f}, ' 'val {:.4f}|{:.4f}, {} {}/{} (@{})'.format( epoch, aves['tl'], aves['ta'], aves['tvl'], aves['tva'], aves['vl'], aves['va'], t_epoch, t_used, t_estimate, _sig)) writer.add_scalars('loss', { 'train': aves['tl'], 'tval': aves['tvl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'tval': aves['tva'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush()