def __init__(self, encoder, encoder_args, masking, masking_args, universal, universal_args, method='cos', temp=10., temp_learnable=True): super().__init__() self.encoder = models.make(encoder, **encoder_args) self.aggregator = models.make('mean-aggregator', **{}) masking_inplane = self.encoder.out_dim * 2 masking_args['inplanes'] = masking_inplane self.masking_model = models.make(masking, **masking_args) in_planes = int(masking_inplane / 2) self.universal = models.make(universal, inplanes=in_planes, **universal_args) self.method = method if temp_learnable: self.temp = nn.Parameter(torch.tensor(temp)) else: self.temp = temp
def prepare_training(): if config.get('resume') is not None: sv_file = torch.load(config['resume']) model = models.make(sv_file['model'], load_sd=True).cuda() optimizer = utils.make_optimizer(model.parameters(), sv_file['optimizer'], load_sd=True) epoch_start = sv_file['epoch'] + 1 if config.get('multi_step_lr') is None: lr_scheduler = None else: lr_scheduler = MultiStepLR(optimizer, **config['multi_step_lr']) for _ in range(epoch_start - 1): lr_scheduler.step() else: model = models.make(config['model']).cuda() optimizer = utils.make_optimizer(model.parameters(), config['optimizer']) epoch_start = 1 if config.get('multi_step_lr') is None: lr_scheduler = None else: lr_scheduler = MultiStepLR(optimizer, **config['multi_step_lr']) log('model: #params={}'.format(utils.compute_num_params(model, text=True))) return model, optimizer, epoch_start, lr_scheduler
def __init__(self, encoder, encoder_args, K=65536, m=0.999, T=0.07, mlp=False): """ dim: feature dimension (default: 128) K: queue size; number of negative keys (default: 65536) m: moco momentum of updating key encoder (default: 0.999) T: softmax temperature (default: 0.07) """ super(MoCo, self).__init__() self.K = K self.m = m self.T = T # create the encoders # feature embedding size is the output fc dimension self.encoder_q = models.make(encoder, **encoder_args) self.encoder_k = models.make(encoder, **encoder_args) dim = self.encoder_q.out_dim self.encoder = self.encoder_q # use encoder_q for downstream tasks if mlp: # hack: brute-force replacement dim_mlp = self.encoder_q.fc.weight.shape[1] self.encoder_q.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), nn.ReLU(), self.encoder_q.fc) self.encoder_k.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), nn.ReLU(), self.encoder_k.fc) for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): param_k.data.copy_(param_q.data) # initialize param_k.requires_grad = False # not update by gradient # create the queue self.register_buffer("queue", torch.randn(dim, K)) self.queue = nn.functional.normalize(self.queue, dim=0) self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long))
def __init__(self, encoder, encoder_args={}, recurrent_model='lstm', seq_len=MAX_LEN_PROGRM, embed_dim=64, hidden_dim=64, n_layers=2, drop_prob=0.5, continuous=True, num_head=2, always_with_input_img=True, use_mixture_density=True, components_size=5, discretized_num=10, base_type_coef=1.0, arg_coef=1.0, repackage=True): super().__init__() self.encoder = models.make(encoder, **encoder_args) self.recurrent_model = models.make(recurrent_model, nemb=embed_dim, nhead=num_head, nhid=hidden_dim, nlayers=n_layers, dropout=drop_prob, repackage=repackage) self.feat_len = self.encoder.out_dim self.continuous = continuous self.seq_len = seq_len # 3 (start, and, stop) + 9 (max No. of actions) + 9 (max No. of actions) self.embed_dim = embed_dim self.hidden_dim = hidden_dim self.n_layers = n_layers self.drop_prob = drop_prob self.always_with_input_img = always_with_input_img self.use_mixture_density = use_mixture_density self.discretized_num = discretized_num self.base_type_coef = base_type_coef self.arg_coef = arg_coef self.base_decoder_names = BASE_ACTIONS self.base_decoder_types = BASE_LINE_TYPES print('recurrent_model type: ', self.recurrent_model.model_type) print('continuous: ', self.continuous) print('use_mixture_density: ', self.use_mixture_density) self.primitive_decoder = PrimitiveDecoder( self.hidden_dim, self.continuous, self.use_mixture_density, components_size, self.base_decoder_names, self.base_decoder_types, self.discretized_num, self.base_type_coef, self.arg_coef).cuda() self.token_len = sum(out[1] for out in self.primitive_decoder.output_dims) self.feat2emb = nn.Linear(self.feat_len, self.embed_dim) self.token2emb = nn.Linear(self.token_len, self.embed_dim)
def __init__(self, encoder, encoder_args, classifier, classifier_args, n_cls_lst): super().__init__() self.encoder = models.make(encoder, **encoder_args) classifier_args['in_dim'] = self.encoder.out_dim classifiers = [] for i in range(len(n_cls_lst)): classifier_args['n_classes'] = n_cls_lst[i] cfr = models.make(classifier, **classifier_args) classifiers.append(cfr) self.classifiers = nn.ModuleList(classifiers)
def __init__(self, encoder_spec): super().__init__() self.encoder = models.make(encoder_spec) imnet_spec = { 'name': 'mlp', 'args': { 'in_dim': 3, 'out_dim': self.encoder.out_dim * 9 * 3, 'hidden_list': [256] } } self.imnet = models.make(imnet_spec)
def __init__(self, encoder, encoder_args={}, n_way=2, method='anil'): super(MAML_ResNet, self).__init__() self.method = method self.encoder = models.make(encoder, **encoder_args) # Only the last (linear) layer is used for adaptation in ANIL self.classifier = MetaLinear(self.encoder.out_dim, n_way)
def __init__(self, encoder, encoder_args={}, method='original'): super().__init__() self.n_way = 2 self.n_shot = 6 self.encoder = models.make(encoder, **encoder_args) self.feat_len = self.encoder.out_dim self.method = method self.mlp_g = nn.Sequential(nn.Linear(self.feat_len * 2, self.feat_len), nn.ReLU(), nn.Linear(self.feat_len, self.feat_len), nn.ReLU()) if self.method == 'original': self.mlp_f = nn.Sequential(nn.Linear(self.feat_len, self.feat_len), nn.ReLU(), # nn.Dropout(0.5), nn.Linear(self.feat_len, 1)) elif self.method == 'modified': self.mlp_f = nn.Sequential(nn.Linear(self.feat_len * 2, self.feat_len), nn.ReLU(), # nn.Dropout(0.5), nn.Linear(self.feat_len, 2)) else: raise Exception('method should be in [original, modified]') print('wren, {}'.format(method))
def __init__(self, encoder, encoder_args={}): super().__init__() self.n_way = 2 self.n_shot = 6 self.encoder = models.make(encoder, **encoder_args) self.mlp = nn.Sequential( nn.Linear(self.encoder.out_dim * 2, self.encoder.out_dim), nn.LeakyReLU(0.1), nn.Linear(self.encoder.out_dim, self.n_way)) print('cnn-baseline')
def set_trainer(config): # load a checkpoint if config.checkpoint is not None: # load data train_loader = load_data(config, 'train', False) model, optimizer, word_map, start_epoch = load_checkpoint( config.checkpoint, device) print('\nLoaded checkpoint from epoch %d.\n' % (start_epoch - 1)) # or initialize model else: start_epoch = 0 # load data train_loader, embeddings, emb_size, word_map, n_classes, vocab_size = load_data( config, 'train', True) model = models.make(config=config, n_classes=n_classes, vocab_size=vocab_size, embeddings=embeddings, emb_size=emb_size) optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=config.lr) # loss functions loss_function = nn.CrossEntropyLoss() # move to device model = model.to(device) loss_function = loss_function.to(device) trainer = Trainer(num_epochs=config.num_epochs, start_epoch=start_epoch, train_loader=train_loader, model=model, model_name=config.model_name, loss_function=loss_function, optimizer=optimizer, lr_decay=config.lr_decay, dataset_name=config.dataset, word_map=word_map, grad_clip=config.grad_clip, print_freq=config.print_freq, checkpoint_path=config.checkpoint_path, checkpoint_basename=config.checkpoint_basename, tensorboard=config.tensorboard, log_dir=config.log_dir) return trainer
def __init__(self, encoder, encoder_args={}, method='cos', temp=10., temp_learnable=True): super().__init__() self.encoder = models.make(encoder, **encoder_args) self.method = method if temp_learnable: self.temp = nn.Parameter(torch.tensor(temp)) else: self.temp = temp
def __init__(self, encoder_spec, imnet_spec=None, local_ensemble=True, feat_unfold=True, cell_decode=True): super().__init__() self.local_ensemble = local_ensemble self.feat_unfold = feat_unfold self.cell_decode = cell_decode self.encoder = models.make(encoder_spec) if imnet_spec is not None: imnet_in_dim = self.encoder.out_dim if self.feat_unfold: imnet_in_dim *= 9 imnet_in_dim += 2 # attach coord if self.cell_decode: imnet_in_dim += 2 self.imnet = models.make(imnet_spec, args={'in_dim': imnet_in_dim}) else: self.imnet = None
def __init__(self, encoder, encoder_args={}, method='sqr', temp=1., temp_learnable=False, progressive=True): super().__init__() self.encoder = models.make(encoder, **encoder_args) self.method = method self.progressive = progressive if temp_learnable: self.temp = nn.Parameter(torch.tensor(temp)) else: self.temp = temp
def __init__(self, encoder, encoder_args={}, head='SVM', normalize=True): super().__init__() self.encoder = models.make(encoder, **encoder_args) self.head = head self.normalize = normalize # Choose the classification head if self.head == 'ProtoNet': print('Method: MetaOptNet, head: ProtoNet, Normalize: {}'.format( self.normalize)) self.cls_head = ClassificationHead(base_learner='ProtoNet').cuda() elif self.head == 'SVM': print('Method: MetaOptNet, head: SVM') self.cls_head = ClassificationHead(base_learner='SVM-CS').cuda() else: print("Cannot recognize the dataset type") assert (False)
def train(config) -> None: """ 训练模型 Args: config: 配置项 Returns: model: 训练好的模型 """ # 加载被 preprocess.py 预处理好的特征 if (config.feature_method == 'o'): x_train, x_test, y_train, y_test = of.load_feature( config, config.train_feature_path_opensmile, train=True) elif (config.feature_method == 'l'): x_train, x_test, y_train, y_test = lf.load_feature( config, config.train_feature_path_librosa, train=True) # x_train, x_test (n_samples, n_feats) # y_train, y_test (n_samples) # 搭建模型 model = models.make(config=config, n_feats=x_train.shape[1]) # 训练模型 print('----- start training', config.model, '-----') if config.model in ['lstm', 'cnn1d', 'cnn2d']: y_train, y_val = np_utils.to_categorical( y_train), np_utils.to_categorical(y_test) # 独热编码 model.train(x_train, y_train, x_test, y_val, batch_size=config.batch_size, n_epochs=config.epochs) else: model.train(x_train, y_train) print('----- end training ', config.model, ' -----') # 验证模型 model.evaluate(x_test, y_test) # 保存训练好的模型 model.save(config.checkpoint_path, config.checkpoint_name)
def __init__(self, prog_synthesis, prog_synthesis_args={}, prog_use_mode=0, update_prog_synthesis=True, method='cos', temp=10., temp_learnable=True): """ Meta Learning with pretrained program synthesis :param prog_synthesis: the pre-trained program synthesis module :param prog_synthesis_args: the arguments of program synthesis module :param prog_use_mode: 0 - image feature only, 1 - program feature only, 2 - program only, 3 - image feature & program feature, 4 - image feature & program """ super().__init__() self.prog_synthesis = models.make(prog_synthesis, **prog_synthesis_args) self.prog_use_mode = prog_use_mode self.update_prog_synthesis = update_prog_synthesis self.method = method if temp_learnable: self.temp = nn.Parameter(torch.tensor(temp)) else: self.temp = temp print('prog_use_mode: {}'.format(self.prog_use_mode)) if self.prog_use_mode != 0: if self.prog_use_mode in [1, 2]: emb_dim = self.prog_synthesis.n_layers * self.prog_synthesis.hidden_dim else: if self.prog_synthesis.recurrent_model.model_type == 'lstm': emb_dim = self.prog_synthesis.encoder.out_dim + \ self.prog_synthesis.n_layers * self.prog_synthesis.hidden_dim else: emb_dim = self.prog_synthesis.encoder.out_dim + self.prog_synthesis.hidden_dim self.out_layer = nn.Linear(emb_dim, self.prog_synthesis.encoder.out_dim)
def __init__(self, encoder, encoder_args={}, dynamic_k=True): # N-way, K-shot (N=2, K=6) super().__init__() self.encoder = models.make(encoder, **encoder_args) self.N = 2 self.K = 6 num_channels = self.encoder.out_dim + self.N self.dynamic_k = dynamic_k num_filters = int(math.ceil(math.log(self.N * self.K + 1, 2))) self.attention1 = AttentionBlock(num_channels, 64, 32) num_channels += 32 self.tc1 = TCBlock(num_channels, self.N * self.K + 1, 128) num_channels += num_filters * 128 self.attention2 = AttentionBlock(num_channels, 256, 128) num_channels += 128 self.tc2 = TCBlock(num_channels, self.N * self.K + 1, 128) num_channels += num_filters * 128 self.attention3 = AttentionBlock(num_channels, 512, 256) num_channels += 256 self.fc = nn.Linear(num_channels, self.N)
def main(config): svname = args.name if svname is None: svname = 'pretrain-multi' if args.tag is not None: svname += '_' + args.tag save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### def make_dataset(name): dataset = make_md([name], 'batch', split='train', image_size=126, batch_size=256) return dataset ds_names = ['ilsvrc_2012', 'omniglot', 'aircraft', 'cu_birds', 'dtd', \ 'quickdraw', 'fungi', 'vgg_flower'] datasets = [] for name in ds_names: datasets.append(make_dataset(name)) iters = [] for d in datasets: iters.append(d.make_one_shot_iterator().get_next()) to_torch_labels = lambda a: torch.from_numpy(a).long() to_pil = transforms.ToPILImage() augmentation = transforms.Compose([ transforms.Resize(146), transforms.RandomResizedCrop(128), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ######## #### Model and Optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer( model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() for epoch in range(1, max_epoch + 1): timer_epoch.s() aves_keys = ['tl', 'ta', 'vl', 'va'] aves = {k: utils.Averager() for k in aves_keys} # train model.train() writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) n_batch = 915547 // 256 with tf.Session() as sess: for i_batch in tqdm(range(n_batch)): if random.randint(0, 1) == 0: ds_id = 0 else: ds_id = random.randint(1, len(datasets) - 1) next_element = iters[ds_id] e, cfr_id = sess.run(next_element) data_, label = e[0], to_torch_labels(e[1]) data_ = ((data_ + 1.0) * 0.5 * 255).astype('uint8') data = torch.zeros(256, 3, 128, 128).float() for i in range(len(data_)): x = data_[i] x = to_pil(x) x = augmentation(x) data[i] = x data = data.cuda() label = label.cuda() logits = model(data, cfr_id=ds_id) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None; loss = None # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) if epoch <= max_epoch: epoch_str = str(epoch) else: epoch_str = 'ex' log_str = 'epoch {}, train {:.4f}|{:.4f}'.format( epoch_str, aves['tl'], aves['ta']) writer.add_scalars('loss', {'train': aves['tl']}, epoch) writer.add_scalars('acc', {'train': aves['ta']}, epoch) if epoch <= max_epoch: log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate) else: log_str += ', {}'.format(t_epoch) utils.log(log_str) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } if epoch <= max_epoch: torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join( save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) else: torch.save(save_obj, os.path.join(save_path, 'epoch-ex.pth')) writer.flush()
def main(config): svname = args.name if svname is None: svname = 'classifier_{}'.format(config['train_dataset']) svname += '_' + config['model_args']['encoder'] clsfr = config['model_args']['classifier'] if clsfr != 'linear-classifier': svname += '-' + clsfr if args.tag is not None: svname += '_' + args.tag save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### # train train_dataset = datasets.make(config['train_dataset'], **config['train_dataset_args']) augmentations = [ transforms.Compose([ transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomResizedCrop(size=(80, 80), scale=(0.08, 1.0), ratio=(0.75, 1.3333)), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomRotation(35), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomResizedCrop(size=(80, 80), scale=(0.08, 1.0), ratio=(0.75, 1.3333)), transforms.RandomRotation(35), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomRotation(35), transforms.ColorJitter(0.4, 0.4, 0.4, 0.1), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomResizedCrop(size=(80, 80), scale=(0.08, 1.0), ratio=(0.75, 1.3333)), transforms.ColorJitter(0.4, 0.4, 0.4, 0.1), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), transforms.Compose([ transforms.RandomRotation(35), transforms.RandomResizedCrop(size=(80, 80), scale=(0.08, 1.0), ratio=(0.75, 1.3333)), transforms.ColorJitter(0.4, 0.4, 0.4, 0.1), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ] train_dataset.transform = augmentations[int(config['_a'])] print(train_dataset.transform) print("_a", config['_a']) input("Continue with these augmentations?") train_loader = DataLoader(train_dataset, config['batch_size'], shuffle=True, num_workers=0, pin_memory=True) utils.log('train dataset: {} (x{}), {}'.format(train_dataset[0][0].shape, len(train_dataset), train_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(train_dataset, 'train_dataset', writer) # val if config.get('val_dataset'): eval_val = True val_dataset = datasets.make(config['val_dataset'], **config['val_dataset_args']) val_loader = DataLoader(val_dataset, config['batch_size'], num_workers=0, pin_memory=True) utils.log('val dataset: {} (x{}), {}'.format(val_dataset[0][0].shape, len(val_dataset), val_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(val_dataset, 'val_dataset', writer) else: eval_val = False # few-shot eval if config.get('fs_dataset'): ef_epoch = config.get('eval_fs_epoch') if ef_epoch is None: ef_epoch = 5 eval_fs = True fs_dataset = datasets.make(config['fs_dataset'], **config['fs_dataset_args']) utils.log('fs dataset: {} (x{}), {}'.format(fs_dataset[0][0].shape, len(fs_dataset), fs_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(fs_dataset, 'fs_dataset', writer) n_way = 5 n_query = 15 n_shots = [1, 5] fs_loaders = [] for n_shot in n_shots: fs_sampler = CategoriesSampler(fs_dataset.label, 200, n_way, n_shot + n_query, ep_per_batch=4) fs_loader = DataLoader(fs_dataset, batch_sampler=fs_sampler, num_workers=0, pin_memory=True) fs_loaders.append(fs_loader) else: eval_fs = False ######## #### Model and Optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if eval_fs: fs_model = models.make('meta-baseline', encoder=None) fs_model.encoder = model.encoder if config.get('_parallel'): model = nn.DataParallel(model) if eval_fs: fs_model = nn.DataParallel(fs_model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer(model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() for epoch in range(1, max_epoch + 1 + 1): if epoch == max_epoch + 1: if not config.get('epoch_ex'): break train_dataset.transform = train_dataset.default_transform print(train_dataset.transform) train_loader = DataLoader(train_dataset, config['batch_size'], shuffle=True, num_workers=0, pin_memory=True) timer_epoch.s() aves_keys = ['tl', 'ta', 'vl', 'va'] if eval_fs: for n_shot in n_shots: aves_keys += ['fsa-' + str(n_shot)] aves = {k: utils.Averager() for k in aves_keys} # train model.train() writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) for data, label in tqdm(train_loader, desc='train', leave=False): # for data, label in train_loader: data, label = data.cuda(), label.cuda() logits = model(data) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None loss = None # eval if eval_val: model.eval() for data, label in tqdm(val_loader, desc='val', leave=False): data, label = data.cuda(), label.cuda() with torch.no_grad(): logits = model(data) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves['vl'].add(loss.item()) aves['va'].add(acc) if eval_fs and (epoch % ef_epoch == 0 or epoch == max_epoch + 1): fs_model.eval() for i, n_shot in enumerate(n_shots): np.random.seed(0) for data, _ in tqdm(fs_loaders[i], desc='fs-' + str(n_shot), leave=False): x_shot, x_query = fs.split_shot_query(data.cuda(), n_way, n_shot, n_query, ep_per_batch=4) label = fs.make_nk_label(n_way, n_query, ep_per_batch=4).cuda() with torch.no_grad(): logits = fs_model(x_shot, x_query).view(-1, n_way) acc = utils.compute_acc(logits, label) aves['fsa-' + str(n_shot)].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) if epoch <= max_epoch: epoch_str = str(epoch) else: epoch_str = 'ex' log_str = 'epoch {}, train {:.4f}|{:.4f}'.format( epoch_str, aves['tl'], aves['ta']) writer.add_scalars('loss', {'train': aves['tl']}, epoch) writer.add_scalars('acc', {'train': aves['ta']}, epoch) if eval_val: log_str += ', val {:.4f}|{:.4f}'.format(aves['vl'], aves['va']) writer.add_scalars('loss', {'val': aves['vl']}, epoch) writer.add_scalars('acc', {'val': aves['va']}, epoch) if eval_fs and (epoch % ef_epoch == 0 or epoch == max_epoch + 1): log_str += ', fs' for n_shot in n_shots: key = 'fsa-' + str(n_shot) log_str += ' {}: {:.4f}'.format(n_shot, aves[key]) writer.add_scalars('acc', {key: aves[key]}, epoch) if epoch <= max_epoch: log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate) else: log_str += ', {}'.format(t_epoch) utils.log(log_str) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } if epoch <= max_epoch: torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save( save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) else: torch.save(save_obj, os.path.join(save_path, 'epoch-ex.pth')) writer.flush()
def main(config): # Environment setup save_dir = config['save_dir'] utils.ensure_path(save_dir) with open(osp.join(save_dir, 'config.yaml'), 'w') as f: yaml.dump(config, f, sort_keys=False) global log, writer logger = set_logger(osp.join(save_dir, 'log.txt')) log = logger.info writer = SummaryWriter(osp.join(save_dir, 'tensorboard')) os.environ['WANDB_NAME'] = config['exp_name'] os.environ['WANDB_DIR'] = config['save_dir'] if not config.get('wandb_upload', False): os.environ['WANDB_MODE'] = 'dryrun' t = config['wandb'] os.environ['WANDB_API_KEY'] = t['api_key'] wandb.init(project=t['project'], entity=t['entity'], config=config) log('logging init done.') log(f'wandb id: {wandb.run.id}') # Dataset, model and optimizer train_dataset = datasets.make((config['train_dataset'])) test_dataset = datasets.make((config['test_dataset'])) model = models.make(config['model'], args=None).cuda() log(f'model #params: {utils.compute_num_params(model)}') n_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')) if n_gpus > 1: model = nn.DataParallel(model) optimizer = utils.make_optimizer(model.parameters(), config['optimizer']) train_loader = DataLoader(train_dataset, config['batch_size'], shuffle=True, num_workers=8, pin_memory=True) test_loader = DataLoader(test_dataset, config['batch_size'], num_workers=8, pin_memory=True) # Ready for training max_epoch = config['max_epoch'] n_milestones = config.get('n_milestones', 1) milestone_epoch = max_epoch // n_milestones min_test_loss = 1e18 sample_batch_train = sample_data_batch(train_dataset).cuda() sample_batch_test = sample_data_batch(test_dataset).cuda() epoch_timer = utils.EpochTimer(max_epoch) for epoch in range(1, max_epoch + 1): log_text = f'epoch {epoch}' # Train model.train() adjust_lr(optimizer, epoch, max_epoch, config) log_temp_scalar('lr', optimizer.param_groups[0]['lr'], epoch) ave_scalars = {k: utils.Averager() for k in ['loss']} pbar = tqdm(train_loader, desc='train', leave=False) for data in pbar: data = data.cuda() t = train_step(model, data, data, optimizer) for k, v in t.items(): ave_scalars[k].add(v, len(data)) pbar.set_description(desc=f"train loss:{t['loss']:.4f}") log_text += ', train:' for k, v in ave_scalars.items(): v = v.item() log_text += f' {k}={v:.4f}' log_temp_scalar('train/' + k, v, epoch) # Test model.eval() ave_scalars = {k: utils.Averager() for k in ['loss']} pbar = tqdm(test_loader, desc='test', leave=False) for data in pbar: data = data.cuda() t = eval_step(model, data, data) for k, v in t.items(): ave_scalars[k].add(v, len(data)) pbar.set_description(desc=f"test loss:{t['loss']:.4f}") log_text += ', test:' for k, v in ave_scalars.items(): v = v.item() log_text += f' {k}={v:.4f}' log_temp_scalar('test/' + k, v, epoch) test_loss = ave_scalars['loss'].item() if epoch % milestone_epoch == 0: with torch.no_grad(): pred = model(sample_batch_train).clamp(0, 1) video_batch = torch.cat([sample_batch_train, pred], dim=0) log_temp_videos('train/videos', video_batch, epoch) img_batch = video_batch[:, :, 3, :, :] log_temp_images('train/images', img_batch, epoch) pred = model(sample_batch_test).clamp(0, 1) video_batch = torch.cat([sample_batch_test, pred], dim=0) log_temp_videos('test/videos', video_batch, epoch) img_batch = video_batch[:, :, 3, :, :] log_temp_images('test/images', img_batch, epoch) # Summary and save log_text += ', {} {}/{}'.format(*epoch_timer.step()) log(log_text) model_ = model.module if n_gpus > 1 else model model_spec = config['model'] model_spec['sd'] = model_.state_dict() optimizer_spec = config['optimizer'] optimizer_spec['sd'] = optimizer.state_dict() pth_file = { 'model': model_spec, 'optimizer': optimizer_spec, 'epoch': epoch, } if test_loss < min_test_loss: min_test_loss = test_loss wandb.run.summary['min_test_loss'] = min_test_loss torch.save(pth_file, osp.join(save_dir, 'min-test-loss.pth')) torch.save(pth_file, osp.join(save_dir, 'epoch-last.pth')) writer.flush()
def load_model(model_id): graph = tf.Graph() model, config = models.make(model_id) mdp = model(graph, config) return mdp, config
def main(config): svname = args.name if svname is None: svname = 'meta_{}-{}shot'.format(config['train_dataset'], config['n_shot']) svname += '_' + config['model'] if config['model_args'].get('encoder'): svname += '-' + config['model_args']['encoder'] if config['model_args'].get('prog_synthesis'): svname += '-' + config['model_args']['prog_synthesis'] svname += '-seed' + str(args.seed) if args.tag is not None: svname += '_' + args.tag save_path = os.path.join(args.save_dir, svname) utils.ensure_path(save_path, remove=False) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) logger = utils.Logger(file_name=os.path.join(save_path, "log_sdout.txt"), file_mode="a+", should_flush=True) #### Dataset #### n_way, n_shot = config['n_way'], config['n_shot'] n_query = config['n_query'] if config.get('n_train_way') is not None: n_train_way = config['n_train_way'] else: n_train_way = n_way if config.get('n_train_shot') is not None: n_train_shot = config['n_train_shot'] else: n_train_shot = n_shot if config.get('ep_per_batch') is not None: ep_per_batch = config['ep_per_batch'] else: ep_per_batch = 1 random_state = np.random.RandomState(args.seed) print('seed:', args.seed) # train train_dataset = datasets.make(config['train_dataset'], **config['train_dataset_args']) utils.log('train dataset: {} (x{})'.format(train_dataset[0][0].shape, len(train_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(train_dataset, 'train_dataset', writer) train_sampler = BongardSampler(train_dataset.n_tasks, config['train_batches'], ep_per_batch, random_state.randint(2**31)) train_loader = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) # tvals tval_loaders = {} tval_name_ntasks_dict = { 'tval': 2000, 'tval_ff': 600, 'tval_bd': 480, 'tval_hd_comb': 400, 'tval_hd_novel': 320 } # numbers depend on dataset for tval_type in tval_name_ntasks_dict.keys(): if config.get('{}_dataset'.format(tval_type)): tval_dataset = datasets.make( config['{}_dataset'.format(tval_type)], **config['{}_dataset_args'.format(tval_type)]) utils.log('{} dataset: {} (x{})'.format(tval_type, tval_dataset[0][0].shape, len(tval_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(tval_dataset, 'tval_ff_dataset', writer) tval_sampler = BongardSampler( tval_dataset.n_tasks, n_batch=tval_name_ntasks_dict[tval_type] // ep_per_batch, ep_per_batch=ep_per_batch, seed=random_state.randint(2**31)) tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler, num_workers=8, pin_memory=True) tval_loaders.update({tval_type: tval_loader}) else: tval_loaders.update({tval_type: None}) # val val_dataset = datasets.make(config['val_dataset'], **config['val_dataset_args']) utils.log('val dataset: {} (x{})'.format(val_dataset[0][0].shape, len(val_dataset))) if config.get('visualize_datasets'): utils.visualize_dataset(val_dataset, 'val_dataset', writer) val_sampler = BongardSampler(val_dataset.n_tasks, n_batch=900 // ep_per_batch, ep_per_batch=ep_per_batch, seed=random_state.randint(2**31)) val_loader = DataLoader(val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) ######## #### Model and optimizer #### if config.get('load'): print('loading pretrained model: ', config['load']) model = models.load(torch.load(config['load'])) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): print('loading pretrained encoder: ', config['load_encoder']) encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('load_prog_synthesis'): print('loading pretrained program synthesis model: ', config['load_prog_synthesis']) prog_synthesis = models.load( torch.load(config['load_prog_synthesis'])) model.prog_synthesis.load_state_dict(prog_synthesis.state_dict()) if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer(model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'vl', 'va'] tval_tuple_lst = [] for k, v in tval_loaders.items(): if v is not None: loss_key = 'tvl' + k.split('tval')[-1] acc_key = ' tva' + k.split('tval')[-1] aves_keys.append(loss_key) aves_keys.append(acc_key) tval_tuple_lst.append((k, v, loss_key, acc_key)) trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) for data, label in tqdm(train_loader, desc='train', leave=False): x_shot, x_query = fs.split_shot_query(data.cuda(), n_train_way, n_train_shot, n_query, ep_per_batch=ep_per_batch) label_query = fs.make_nk_label(n_train_way, n_query, ep_per_batch=ep_per_batch).cuda() if config['model'] == 'snail': # only use one selected label_query query_dix = random_state.randint(n_train_way * n_query) label_query = label_query.view(ep_per_batch, -1)[:, query_dix] x_query = x_query[:, query_dix:query_dix + 1] if config['model'] == 'maml': # need grad in maml model.zero_grad() logits = model(x_shot, x_query).view(-1, n_train_way) loss = F.cross_entropy(logits, label_query) acc = utils.compute_acc(logits, label_query) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None loss = None # eval model.eval() for name, loader, name_l, name_a in [('val', val_loader, 'vl', 'va') ] + tval_tuple_lst: if config.get('{}_dataset'.format(name)) is None: continue np.random.seed(0) for data, _ in tqdm(loader, desc=name, leave=False): x_shot, x_query = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, ep_per_batch=ep_per_batch) label_query = fs.make_nk_label( n_way, n_query, ep_per_batch=ep_per_batch).cuda() if config[ 'model'] == 'snail': # only use one randomly selected label_query query_dix = random_state.randint(n_train_way) label_query = label_query.view(ep_per_batch, -1)[:, query_dix] x_query = x_query[:, query_dix:query_dix + 1] if config['model'] == 'maml': # need grad in maml model.zero_grad() logits = model(x_shot, x_query, eval=True).view(-1, n_way) loss = F.cross_entropy(logits, label_query) acc = utils.compute_acc(logits, label_query) else: with torch.no_grad(): logits = model(x_shot, x_query, eval=True).view(-1, n_way) loss = F.cross_entropy(logits, label_query) acc = utils.compute_acc(logits, label_query) aves[name_l].add(loss.item()) aves[name_a].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) log_str = 'epoch {}, train {:.4f}|{:.4f}, val {:.4f}|{:.4f}'.format( epoch, aves['tl'], aves['ta'], aves['vl'], aves['va']) for tval_name, _, loss_key, acc_key in tval_tuple_lst: log_str += ', {} {:.4f}|{:.4f}'.format(tval_name, aves[loss_key], aves[acc_key]) writer.add_scalars('loss', {tval_name: aves[loss_key]}, epoch) writer.add_scalars('acc', {tval_name: aves[acc_key]}, epoch) log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate) utils.log(log_str) writer.add_scalars('loss', { 'train': aves['tl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush() print('finished training!') logger.close()
def main(config): svname = args.name if svname is None: svname = 'meta' if args.tag is not None: svname += '_' + args.tag save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### if args.dataset == 'all': train_lst = ['ilsvrc_2012', 'omniglot', 'aircraft', 'cu_birds', 'dtd', 'quickdraw', 'fungi', 'vgg_flower'] eval_lst = ['ilsvrc_2012'] else: train_lst = [args.dataset] eval_lst = [args.dataset] if config.get('no_train') == True: train_iter = None else: trainset = make_md(train_lst, 'episodic', split='train', image_size=126) train_iter = trainset.make_one_shot_iterator().get_next() if config.get('no_val') == True: val_iter = None else: valset = make_md(eval_lst, 'episodic', split='val', image_size=126) val_iter = valset.make_one_shot_iterator().get_next() testset = make_md(eval_lst, 'episodic', split='test', image_size=126) test_iter = testset.make_one_shot_iterator().get_next() sess = tf.Session() ######## #### Model and optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer( model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'tvl', 'tva', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] def process_data(e): e = list(e[0]) transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(146), transforms.CenterCrop(128), transforms.ToTensor(), transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) for ii in [0, 3]: e[ii] = ((e[ii] + 1.0) * 0.5 * 255).astype('uint8') tmp = torch.zeros(len(e[ii]), 3, 128, 128).float() for i in range(len(e[ii])): tmp[i] = transform(e[ii][i]) e[ii] = tmp.cuda() e[1] = torch.from_numpy(e[1]).long().cuda() e[4] = torch.from_numpy(e[4]).long().cuda() return e for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) if config.get('no_train') == True: pass else: for i_ep in tqdm(range(config['n_train'])): e = process_data(sess.run(train_iter)) loss, acc = model(e[0], e[1], e[3], e[4]) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) loss = None # eval model.eval() for name, ds_iter, name_l, name_a in [ ('tval', val_iter, 'tvl', 'tva'), ('val', test_iter, 'vl', 'va')]: if config.get('no_val') == True and name == 'tval': continue for i_ep in tqdm(range(config['n_eval'])): e = process_data(sess.run(ds_iter)) with torch.no_grad(): loss, acc = model(e[0], e[1], e[3], e[4]) aves[name_l].add(loss.item()) aves[name_a].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) _sig = 0 t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) utils.log('epoch {}, train {:.4f}|{:.4f}, tval {:.4f}|{:.4f}, ' 'val {:.4f}|{:.4f}, {} {}/{} (@{})'.format( epoch, aves['tl'], aves['ta'], aves['tvl'], aves['tva'], aves['vl'], aves['va'], t_epoch, t_used, t_estimate, _sig)) writer.add_scalars('loss', { 'train': aves['tl'], 'tval': aves['tvl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'tval': aves['tva'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush()
def __init__(self, encoder, encoder_args, classifier, classifier_args): super().__init__() self.encoder = models.make(encoder, **encoder_args) classifier_args['in_dim'] = self.encoder.out_dim self.classifier = models.make(classifier, **classifier_args)
import models from utils import make_coord from test import batched_predict if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--input', default='input.png') parser.add_argument('--model') parser.add_argument('--resolution') parser.add_argument('--output', default='output.png') parser.add_argument('--gpu', default='0') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu img = transforms.ToTensor()(Image.open(args.input)) model = models.make(torch.load(args.model)['model'], load_sd=True).cuda() h, w = list(map(int, args.resolution.split(','))) coord = make_coord((h, w)).cuda() cell = torch.ones_like(coord) cell[:, 0] *= 2 / h cell[:, 1] *= 2 / w pred = batched_predict(model, ((img - 0.5) / 0.5).cuda().unsqueeze(0), coord.unsqueeze(0), cell.unsqueeze(0), bsize=30000)[0] pred = (pred * 0.5 + 0.5).clamp(0, 1).view(h, w, 3).permute(2, 0, 1).cpu() transforms.ToPILImage()(pred).save(args.output)
def main(config): svname = args.name if svname is None: svname = 'meta_{}-{}shot'.format( config['train_dataset'], config['n_shot']) svname += '_' + config['model'] + '-' + config['model_args']['encoder'] if args.tag is not None: svname += '_' + args.tag save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### n_way, n_shot = config['n_way'], config['n_shot'] n_query = config['n_query'] if config.get('n_train_way') is not None: n_train_way = config['n_train_way'] else: n_train_way = n_way if config.get('n_train_shot') is not None: n_train_shot = config['n_train_shot'] else: n_train_shot = n_shot if config.get('ep_per_batch') is not None: ep_per_batch = config['ep_per_batch'] else: ep_per_batch = 1 # train train_dataset = datasets.make(config['train_dataset'], **config['train_dataset_args']) utils.log('train dataset: {} (x{}), {}'.format( train_dataset[0][0].shape, len(train_dataset), train_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(train_dataset, 'train_dataset', writer) train_sampler = CategoriesSampler( train_dataset.label, config['train_batches'], n_train_way, n_train_shot + n_query, ep_per_batch=ep_per_batch) train_loader = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) # tval if config.get('tval_dataset'): tval_dataset = datasets.make(config['tval_dataset'], **config['tval_dataset_args']) utils.log('tval dataset: {} (x{}), {}'.format( tval_dataset[0][0].shape, len(tval_dataset), tval_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(tval_dataset, 'tval_dataset', writer) tval_sampler = CategoriesSampler( tval_dataset.label, 200, n_way, n_shot + n_query, ep_per_batch=4) tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler, num_workers=8, pin_memory=True) else: tval_loader = None # val val_dataset = datasets.make(config['val_dataset'], **config['val_dataset_args']) utils.log('val dataset: {} (x{}), {}'.format( val_dataset[0][0].shape, len(val_dataset), val_dataset.n_classes)) if config.get('visualize_datasets'): utils.visualize_dataset(val_dataset, 'val_dataset', writer) val_sampler = CategoriesSampler( val_dataset.label, 200, n_way, n_shot + n_query, ep_per_batch=4) val_loader = DataLoader(val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) ######## #### Model and optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer( model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'tvl', 'tva', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) np.random.seed(epoch) for data, _ in tqdm(train_loader, desc='train', leave=False): x_shot, x_query = fs.split_shot_query( data.cuda(), n_train_way, n_train_shot, n_query, ep_per_batch=ep_per_batch) label = fs.make_nk_label(n_train_way, n_query, ep_per_batch=ep_per_batch).cuda() logits = model(x_shot, x_query).view(-1, n_train_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None; loss = None # eval model.eval() for name, loader, name_l, name_a in [ ('tval', tval_loader, 'tvl', 'tva'), ('val', val_loader, 'vl', 'va')]: if (config.get('tval_dataset') is None) and name == 'tval': continue np.random.seed(0) for data, _ in tqdm(loader, desc=name, leave=False): x_shot, x_query = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, ep_per_batch=4) label = fs.make_nk_label(n_way, n_query, ep_per_batch=4).cuda() with torch.no_grad(): logits = model(x_shot, x_query).view(-1, n_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves[name_l].add(loss.item()) aves[name_a].add(acc) _sig = int(_[-1]) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) utils.log('epoch {}, train {:.4f}|{:.4f}, tval {:.4f}|{:.4f}, ' 'val {:.4f}|{:.4f}, {} {}/{} (@{})'.format( epoch, aves['tl'], aves['ta'], aves['tvl'], aves['tva'], aves['vl'], aves['va'], t_epoch, t_used, t_estimate, _sig)) writer.add_scalars('loss', { 'train': aves['tl'], 'tval': aves['tvl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'tval': aves['tva'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush()
def main(config): # dataset dataset = datasets.make(config['dataset'], **config['dataset_args']) utils.log('dataset: {} (x{}), {}'.format(dataset[0][0].shape, len(dataset), dataset.n_classes)) if not args.sauc: n_way = 5 else: n_way = 2 n_shot, n_unlabel, n_query = args.shot, 30, 15 n_batch = 200 ep_per_batch = 4 batch_sampler = CategoriesSampler_Semi(dataset.label, n_batch, n_way, n_shot, n_unlabel, n_query, ep_per_batch=ep_per_batch) loader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=8, pin_memory=True) # model if config.get('load') is None: model = models.make('meta-baseline', encoder=None) else: model = models.load(torch.load(config['load'])) if config.get('load_encoder') is not None: encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder = encoder if config.get('_parallel'): model = nn.DataParallel(model) model.eval() utils.log('num params: {}'.format(utils.compute_n_params(model))) # testing aves_keys = ['vl', 'va'] aves = {k: utils.Averager() for k in aves_keys} test_epochs = args.test_epochs np.random.seed(0) va_lst = [] for epoch in range(1, test_epochs + 1): for data, _ in tqdm(loader, leave=False): x_shot, x_unlabel, x_query = fs.split_shot_query_semi( data.cuda(), n_way, n_shot, n_unlabel, n_query, ep_per_batch=ep_per_batch) with torch.no_grad(): if not args.sauc: logits = model(x_shot, x_unlabel, x_query).view(-1, n_way) label = fs.make_nk_label(n_way, n_query, ep_per_batch=ep_per_batch).cuda() loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves['vl'].add(loss.item(), len(data)) aves['va'].add(acc, len(data)) va_lst.append(acc) else: x_shot = x_shot[:, 0, :, :, :, :].contiguous() shot_shape = x_shot.shape[:-3] img_shape = x_shot.shape[-3:] bs = shot_shape[0] p = model.encoder(x_shot.view(-1, *img_shape)).reshape( *shot_shape, -1).mean(dim=1, keepdim=True) q = model.encoder(x_query.view(-1, *img_shape)).view( bs, -1, p.shape[-1]) p = F.normalize(p, dim=-1) q = F.normalize(q, dim=-1) s = torch.bmm(q, p.transpose(2, 1)).view(bs, -1).cpu() for i in range(bs): k = s.shape[1] // 2 y_true = [1] * k + [0] * k acc = roc_auc_score(y_true, s[i]) aves['va'].add(acc, len(data)) va_lst.append(acc) print('test epoch {}: acc={:.2f} +- {:.2f} (%), loss={:.4f} (@{})'. format(epoch, aves['va'].item() * 100, mean_confidence_interval(va_lst) * 100, aves['vl'].item(), _[-1]))
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--config') parser.add_argument('--model') parser.add_argument('--gpu', default='0') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu with open(args.config, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) spec = config['test_dataset'] dataset = datasets.make(spec['dataset']) dataset = datasets.make(spec['wrapper'], args={'dataset': dataset}) loader = DataLoader(dataset, batch_size=spec['batch_size'], num_workers=8, pin_memory=True) model_spec = torch.load(args.model)['model'] model = models.make(model_spec, load_sd=True).cuda() res = eval_psnr(loader, model, data_norm=config.get('data_norm'), eval_type=config.get('eval_type'), eval_bsize=config.get('eval_bsize'), verbose=True) print('result: {:.4f}'.format(res))
def main(config): svname = config.get('sv_name') if args.tag is not None: svname += '_' + args.tag config['sv_name'] = svname save_path = os.path.join('./save', svname) utils.ensure_path(save_path) utils.set_log_path(save_path) utils.log(svname) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w')) #### Dataset #### n_way, n_shot = config['n_way'], config['n_shot'] n_query = config['n_query'] n_pseudo = config['n_pseudo'] ep_per_batch = config['ep_per_batch'] if config.get('test_batches') is not None: test_batches = config['test_batches'] else: test_batches = config['train_batches'] for s in ['train', 'val', 'tval']: if config.get(f"{s}_dataset_args") is not None: config[f"{s}_dataset_args"]['data_dir'] = os.path.join(os.getcwd(), os.pardir, 'data_root') # train train_dataset = CustomDataset(config['train_dataset'], save_dir=config.get('load_encoder'), **config['train_dataset_args']) if config['train_dataset_args']['split'] == 'helper': with open(os.path.join(save_path, 'train_helper_cls.pkl'), 'wb') as f: pkl.dump(train_dataset.dataset_classes, f) train_sampler = EpisodicSampler(train_dataset, config['train_batches'], n_way, n_shot, n_query, n_pseudo, episodes_per_batch=ep_per_batch) train_loader = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=4, pin_memory=True) # tval if config.get('tval_dataset'): tval_dataset = CustomDataset(config['tval_dataset'], **config['tval_dataset_args']) tval_sampler = EpisodicSampler(tval_dataset, test_batches, n_way, n_shot, n_query, n_pseudo, episodes_per_batch=ep_per_batch) tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler, num_workers=4, pin_memory=True) else: tval_loader = None # val val_dataset = CustomDataset(config['val_dataset'], **config['val_dataset_args']) val_sampler = EpisodicSampler(val_dataset, test_batches, n_way, n_shot, n_query, n_pseudo, episodes_per_batch=ep_per_batch) val_loader = DataLoader(val_dataset, batch_sampler=val_sampler, num_workers=4, pin_memory=True) #### Model and optimizer #### if config.get('load'): model_sv = torch.load(config['load']) model = models.load(model_sv) else: model = models.make(config['model'], **config['model_args']) if config.get('load_encoder'): encoder = models.load(torch.load(config['load_encoder'])).encoder model.encoder.load_state_dict(encoder.state_dict()) if config.get('freeze_encoder'): for param in model.encoder.parameters(): param.requires_grad = False if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) optimizer, lr_scheduler = utils.make_optimizer( model.parameters(), config['optimizer'], **config['optimizer_args']) ######## max_epoch = config['max_epoch'] save_epoch = config.get('save_epoch') max_va = 0. timer_used = utils.Timer() timer_epoch = utils.Timer() aves_keys = ['tl', 'ta', 'tvl', 'tva', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in range(1, max_epoch + 1): timer_epoch.s() aves = {k: utils.Averager() for k in aves_keys} # train model.train() if config.get('freeze_bn'): utils.freeze_bn(model) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) np.random.seed(epoch) for data in tqdm(train_loader, desc='train', leave=False): x_shot, x_query, x_pseudo = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, n_pseudo, ep_per_batch=ep_per_batch) label = fs.make_nk_label(n_way, n_query, ep_per_batch=ep_per_batch).cuda() logits = model(x_shot, x_query, x_pseudo) logits = logits.view(-1, n_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) optimizer.zero_grad() loss.backward() optimizer.step() aves['tl'].add(loss.item()) aves['ta'].add(acc) logits = None; loss = None # eval model.eval() for name, loader, name_l, name_a in [ ('tval', tval_loader, 'tvl', 'tva'), ('val', val_loader, 'vl', 'va')]: if (config.get('tval_dataset') is None) and name == 'tval': continue np.random.seed(0) for data in tqdm(loader, desc=name, leave=False): x_shot, x_query, x_pseudo = fs.split_shot_query( data.cuda(), n_way, n_shot, n_query, n_pseudo, ep_per_batch=ep_per_batch) label = fs.make_nk_label(n_way, n_query, ep_per_batch=ep_per_batch).cuda() with torch.no_grad(): logits = model(x_shot, x_query, x_pseudo) logits = logits.view(-1, n_way) loss = F.cross_entropy(logits, label) acc = utils.compute_acc(logits, label) aves[name_l].add(loss.item()) aves[name_a].add(acc) # post if lr_scheduler is not None: lr_scheduler.step() for k, v in aves.items(): aves[k] = v.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.t()) t_used = utils.time_str(timer_used.t()) t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch) utils.log('epoch {}, train {:.4f}|{:.4f}, tval {:.4f}|{:.4f}, ' 'val {:.4f}|{:.4f}, {} {}/{}'.format( epoch, aves['tl'], aves['ta'], aves['tvl'], aves['tva'], aves['vl'], aves['va'], t_epoch, t_used, t_estimate)) writer.add_scalars('loss', { 'train': aves['tl'], 'tval': aves['tvl'], 'val': aves['vl'], }, epoch) writer.add_scalars('acc', { 'train': aves['ta'], 'tval': aves['tva'], 'val': aves['va'], }, epoch) if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_sd': optimizer.state_dict(), } save_obj = { 'file': __file__, 'config': config, 'model': config['model'], 'model_args': config['model_args'], 'model_sd': model_.state_dict(), 'training': training, } torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(save_path, 'trlog.pth')) if (save_epoch is not None) and epoch % save_epoch == 0: torch.save(save_obj, os.path.join(save_path, 'epoch-{}.pth'.format(epoch))) if aves['va'] > max_va: max_va = aves['va'] torch.save(save_obj, os.path.join(save_path, 'max-va.pth')) writer.flush()
def main(config, args): random.seed(0) np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed(0) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False wandb_auth() try: __IPYTHON__ wandb.init(project="NAS", group=f"maml") except: wandb.init(project="NAS", group=f"maml", config=config) ckpt_name = args.name if ckpt_name is None: ckpt_name = config['encoder'] ckpt_name += '_' + config['dataset'].replace('meta-', '') ckpt_name += '_{}_way_{}_shot'.format(config['train']['n_way'], config['train']['n_shot']) if args.tag is not None: ckpt_name += '_' + args.tag ckpt_path = os.path.join('./save', ckpt_name) utils.ensure_path(ckpt_path) utils.set_log_path(ckpt_path) writer = SummaryWriter(os.path.join(ckpt_path, 'tensorboard')) yaml.dump(config, open(os.path.join(ckpt_path, 'config.yaml'), 'w')) ##### Dataset ##### # meta-train train_set = datasets.make(config['dataset'], **config['train']) utils.log('meta-train set: {} (x{}), {}'.format(train_set[0][0].shape, len(train_set), train_set.n_classes)) # meta-val eval_val = False if config.get('val'): eval_val = True val_set = datasets.make(config['dataset'], **config['val']) utils.log('meta-val set: {} (x{}), {}'.format(val_set[0][0].shape, len(val_set), val_set.n_classes)) val_loader = DataLoader(val_set, config['val']['n_episode'], collate_fn=datasets.collate_fn, num_workers=1, pin_memory=True) # if args.split == "traintrain" and config.get('val'): # TODO I dont think this is what they meant by train-train :D # train_set = torch.utils.data.ConcatDataset([train_set, val_set]) train_loader = DataLoader(train_set, config['train']['n_episode'], collate_fn=datasets.collate_fn, num_workers=1, pin_memory=True) ##### Model and Optimizer ##### inner_args = utils.config_inner_args(config.get('inner_args')) if config.get('load') or (args.load is True and os.path.exists(ckpt_path + '/epoch-last.pth')): if config.get('load') is None: config['load'] = ckpt_path + '/epoch-last.pth' ckpt = torch.load(config['load']) config['encoder'] = ckpt['encoder'] config['encoder_args'] = ckpt['encoder_args'] config['classifier'] = ckpt['classifier'] config['classifier_args'] = ckpt['classifier_args'] model = models.load(ckpt, load_clf=(not inner_args['reset_classifier'])) optimizer, lr_scheduler = optimizers.load(ckpt, model.parameters()) start_epoch = ckpt['training']['epoch'] + 1 max_va = ckpt['training']['max_va'] else: config['encoder_args'] = config.get('encoder_args') or dict() config['classifier_args'] = config.get('classifier_args') or dict() config['encoder_args']['bn_args']['n_episode'] = config['train'][ 'n_episode'] config['classifier_args']['n_way'] = config['train']['n_way'] model = models.make(config['encoder'], config['encoder_args'], config['classifier'], config['classifier_args']) optimizer, lr_scheduler = optimizers.make(config['optimizer'], model.parameters(), **config['optimizer_args']) start_epoch = 1 max_va = 0. if args.efficient: model.go_efficient() if config.get('_parallel'): model = nn.DataParallel(model) utils.log('num params: {}'.format(utils.compute_n_params(model))) timer_elapsed, timer_epoch = utils.Timer(), utils.Timer() ##### Training and evaluation ##### # 'tl': meta-train loss # 'ta': meta-train accuracy # 'vl': meta-val loss # 'va': meta-val accuracy aves_keys = ['tl', 'ta', 'vl', 'va'] trlog = dict() for k in aves_keys: trlog[k] = [] for epoch in tqdm(range(start_epoch, config['epoch'] + 1), desc="Iterating over epochs"): timer_epoch.start() aves = {k: utils.AverageMeter() for k in aves_keys} # meta-train model.train() writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) np.random.seed(epoch) all_sotls = 0 all_sovls = 0 for data_idx, data in enumerate( tqdm(train_loader, desc='meta-train', leave=False)): x_shot, x_query, y_shot, y_query = data x_shot, y_shot = x_shot.cuda(), y_shot.cuda() x_query, y_query = x_query.cuda(), y_query.cuda() if inner_args['reset_classifier']: if config.get('_parallel'): model.module.reset_classifier() else: model.reset_classifier() if args.split == "traintrain": x_query = x_shot y_query = y_shot logits, sotl, all_losses = model(x_shot, x_query, y_shot, inner_args, meta_train=True) # print("HAHHA", data_idx, all_losses) # sotl = sum([l[-1] for l in all_losses]) # for l in all_losses[:-1]: # for i in range(len(l)-1): # l[i] = l[i].detach() logits = logits.flatten(0, 1) labels = y_query.flatten() all_sotls += sotl pred = torch.argmax(logits, dim=-1) acc = utils.compute_acc(pred, labels) loss = F.cross_entropy(logits, labels) # all_sovls += loss # TODO I think this causes blowup because it creates new tensors that never get discarded and it maintains the computational graph after? if args.split == "trainval" or ( args.split == "sovl" and not data_idx % args.sotl_freq == 0): aves['tl'].update(loss.item(), 1) aves['ta'].update(acc, 1) optimizer.zero_grad() loss.backward() for param in optimizer.param_groups[0]['params']: nn.utils.clip_grad_value_(param, 10) optimizer.step() elif args.split == "traintrain": aves['tl'].update(loss.item(), 1) aves['ta'].update(acc, 1) # sotl = sum(sotl) + loss optimizer.zero_grad() # sotl.backward() loss.backward() for param in optimizer.param_groups[0]['params']: nn.utils.clip_grad_value_(param, 10) optimizer.step() elif args.split == "sotl" and data_idx % args.sotl_freq == 0: # TODO doesnt work whatsoever aves['tl'].update(loss.item(), 1) aves['ta'].update(acc, 1) optimizer.zero_grad() all_sotls.backward() for param in optimizer.param_groups[0]['params']: nn.utils.clip_grad_value_(param, 10) optimizer.step() all_sotls = 0 # detach elif args.split == "sovl" and data_idx % args.sotl_freq == 0: # TODO doesnt work whatsoever aves['tl'].update(loss.item(), 1) aves['ta'].update(acc, 1) optimizer.zero_grad() all_sovls.backward() for param in optimizer.param_groups[0]['params']: nn.utils.clip_grad_value_(param, 10) optimizer.step() all_sovls = 0 # detach # meta-val if eval_val: model.eval() np.random.seed(0) for data in tqdm(val_loader, desc='meta-val', leave=False): x_shot, x_query, y_shot, y_query = data x_shot, y_shot = x_shot.cuda(), y_shot.cuda() x_query, y_query = x_query.cuda(), y_query.cuda() if inner_args['reset_classifier']: if config.get('_parallel'): model.module.reset_classifier() else: model.reset_classifier() logits, sotl, all_losses = model(x_shot, x_query, y_shot, inner_args, meta_train=False) logits = logits.flatten(0, 1) labels = y_query.flatten() pred = torch.argmax(logits, dim=-1) acc = utils.compute_acc(pred, labels) loss = F.cross_entropy(logits, labels) aves['vl'].update(loss.item(), 1) aves['va'].update(acc, 1) if lr_scheduler is not None: lr_scheduler.step() for k, avg in aves.items(): aves[k] = avg.item() trlog[k].append(aves[k]) t_epoch = utils.time_str(timer_epoch.end()) t_elapsed = utils.time_str(timer_elapsed.end()) t_estimate = utils.time_str(timer_elapsed.end() / (epoch - start_epoch + 1) * (config['epoch'] - start_epoch + 1)) # formats output log_str = 'epoch {}, meta-train {:.4f}|{:.4f}'.format( str(epoch), aves['tl'], aves['ta']) writer.add_scalars('loss', {'meta-train': aves['tl']}, epoch) writer.add_scalars('acc', {'meta-train': aves['ta']}, epoch) if eval_val: log_str += ', meta-val {:.4f}|{:.4f}'.format( aves['vl'], aves['va']) writer.add_scalars('loss', {'meta-val': aves['vl']}, epoch) writer.add_scalars('acc', {'meta-val': aves['va']}, epoch) wandb.log({ "train_loss": aves['tl'], "train_acc": aves['ta'], "val_loss": aves['vl'], "val_acc": aves['va'] }) log_str += ', {} {}/{}'.format(t_epoch, t_elapsed, t_estimate) utils.log(log_str) # saves model and meta-data if config.get('_parallel'): model_ = model.module else: model_ = model training = { 'epoch': epoch, 'max_va': max(max_va, aves['va']), 'optimizer': config['optimizer'], 'optimizer_args': config['optimizer_args'], 'optimizer_state_dict': optimizer.state_dict(), 'lr_scheduler_state_dict': lr_scheduler.state_dict() if lr_scheduler is not None else None, } ckpt = { 'file': __file__, 'config': config, 'encoder': config['encoder'], 'encoder_args': config['encoder_args'], 'encoder_state_dict': model_.encoder.state_dict(), 'classifier': config['classifier'], 'classifier_args': config['classifier_args'], 'classifier_state_dict': model_.classifier.state_dict(), 'training': training, } # 'epoch-last.pth': saved at the latest epoch # 'max-va.pth': saved when validation accuracy is at its maximum torch.save(ckpt, os.path.join(ckpt_path, 'epoch-last.pth')) torch.save(trlog, os.path.join(ckpt_path, 'trlog.pth')) if aves['va'] > max_va: max_va = aves['va'] torch.save(ckpt, os.path.join(ckpt_path, 'max-va.pth')) writer.flush()