def train(model, optimizer, source_loader, args): model.train() acc_avg = Averager() loss_avg = Averager() iter_source = iter(source_loader) num_iter = len(iter_source) for i in range(num_iter): sx, sy = iter_source.next() if args.cuda: sx, sy = sx.cuda(), sy.cuda() _, logits = model(sx) criterion = nn.CrossEntropyLoss() loss = criterion(logits, sy) optimizer.zero_grad() loss.backward() optimizer.step() acc = count_acc(logits, sy) acc_avg.add(acc) loss_avg.add(loss.item()) acc = acc_avg.item() loss = loss_avg.item() return loss, acc
def val(model, args, val_loader, label): model.eval() vl = Averager() va = Averager() with torch.no_grad(): for i, batch in tqdm(enumerate(val_loader, 1), total=len(val_loader)): data, index_label = batch[0].cuda(), batch[1] logits = model(data, mode='val') loss = F.cross_entropy(logits, label) acc = count_acc(logits, label) vl.add(loss.item()) va.add(acc) vl = vl.item() va = va.item() return vl, va
def train_stage(self, data_train_stage1, data_train_stage2, model, epoch): print('Training Epoch {}:'.format(epoch + 1)) model.train() aid_set = list(set(data_train_stage1.aid)) avg_loss = Averager() data_train = data_train_stage1 n_samples = data_train.shape[0] n_batch = int(np.ceil(n_samples / self.batchsize)) if self.sample_method == 'normal': list_prob = [] for aid in aid_set: list_prob.append(data_train_stage1[data_train_stage1.aid == aid].shape[0]) list_prob_sum = sum(list_prob) for i in range(len(list_prob)): list_prob[i] = list_prob[i] / list_prob_sum elif self.sample_method == 'sqrt': list_prob = [] for aid in aid_set: list_prob.append(math.sqrt(data_train_stage1[data_train_stage1.aid == aid].shape[0])) list_prob_sum = sum(list_prob) for i in range(len(list_prob)): list_prob[i] = list_prob[i] / list_prob_sum for i_batch in tqdm.tqdm(range(n_batch)): if (self.sample_method == 'normal') or (self.sample_method == 'sqrt'): batch_aid_set = np.random.choice(aid_set, size=self.task_count, replace=False, p=list_prob)#random.sample(aid_set, 5) elif self.sample_method == 'unit': batch_aid_set = random.sample(aid_set, self.task_count) list_sup_x, list_sup_y, list_qry_x, list_qry_y = list(), list(), list(), list() for aid in batch_aid_set: batch_sup = data_train[data_train.aid == aid].sample(self.batchsize) batch_qry = data_train[data_train.aid == aid].sample(self.batchsize) batch_sup_x = batch_sup[self.train_col] batch_sup_y = batch_sup[self.label_col].values batch_qry_x = batch_qry[self.train_col] batch_qry_y = batch_qry[self.label_col].values list_sup_x.append(batch_sup_x) list_sup_y.append(batch_sup_y) list_qry_x.append(batch_qry_x) list_qry_y.append(batch_qry_y) loss = model.global_update(list_sup_x, list_sup_y, list_qry_x, list_qry_y) avg_loss.add(loss.item()) print('Training Epoch {}; Loss {}; '.format(epoch + 1, avg_loss.item()))
def test(model, target_loader, args): model.eval() acc_avg = Averager() with torch.no_grad(): iter_target = iter(target_loader) num_iter = len(iter_target) for i in range(num_iter): tx, ty = iter_target.next() if torch.cuda.is_available(): tx, ty = tx.cuda(), ty.cuda() _, logits = model(tx) acc = count_acc(logits, ty) acc_avg.add(acc) acc = acc_avg.item() return acc
def test(model, label, args, few_shot_params): if args.debug: n_test = 10 print_freq = 2 else: n_test = 1000 print_freq = 100 test_file = args.dataset_dir + 'test.json' test_datamgr = SetDataManager(test_file, args.dataset_dir, args.image_size, mode = 'val',n_episode = n_test ,**few_shot_params) loader = test_datamgr.get_data_loader(aug=False) test_acc_record = np.zeros((n_test,)) warmup_state = torch.load(osp.join(args.checkpoint_dir, 'max_acc' + '.pth'))['params'] model.load_state_dict(warmup_state, strict=False) model.eval() ave_acc = Averager() with torch.no_grad(): for i, batch in enumerate(loader, 1): data, index_label = batch[0].cuda(), batch[1].cuda() logits = model(data, 'test') acc = count_acc(logits, label) ave_acc.add(acc) test_acc_record[i - 1] = acc if i % print_freq == 0: print('batch {}: {:.2f}({:.2f})'.format(i, ave_acc.item() * 100, acc * 100)) m, pm = compute_confidence_interval(test_acc_record) # print('Val Best Epoch {}, Acc {:.4f}, Test Acc {:.4f}'.format(trlog['max_acc_epoch'], trlog['max_acc'], # ave_acc.item())) print('Test Acc {:.4f} + {:.4f}'.format(m, pm)) acc_str = '%4.2f' % (m * 100) with open(args.save_dir + '/result.txt', 'a') as f: f.write('%s %s\n' % (acc_str, args.name))
logits = model(data_shot, data_query) loss = F.cross_entropy(logits, label) acc = count_acc(logits, label) writer.add_scalar('data/loss', float(loss), global_count) writer.add_scalar('data/acc', float(acc), global_count) print('epoch {}, train {}/{}, loss={:.4f} acc={:.4f}'.format( epoch, i, len(train_loader), loss.item(), acc)) tl.add(loss.item()) ta.add(acc) optimizer.zero_grad() loss.backward() optimizer.step() tl = tl.item() ta = ta.item() model.eval() vl = Averager() va = Averager() label = torch.arange(args.validation_way).repeat(args.query) if torch.cuda.is_available(): label = label.type(torch.cuda.LongTensor) else: label = label.type(torch.LongTensor) print('best epoch {}, best val acc={:.4f}'.format( trlog['max_acc_epoch'], trlog['max_acc']))
mask_spt[mask_spt != 0] = 1 N, c, w, h = data.size() data_spt_repeat = data_spt.unsqueeze(0).repeat(N, 1, 1, 1, 1) mask_spt_repeat = mask_spt.unsqueeze(0).repeat(N, 1, 1, 1, 1) bg_repeat = bg.unsqueeze(0).repeat(p, 1, 1, 1, 1).permute(1, 0, 2, 3, 4) merge_spt = data_spt_repeat * mask_spt_repeat.float() + bg_repeat * ( 1 - mask_spt_repeat.float()) m, n, c, w, h = merge_spt.size() merge_spt = merge_spt.reshape(m * n, c, w, h) proto = model(merge_spt) proto = proto.reshape(m, args.test_way, -1).mean(dim=0) label = torch.arange(args.test_way).repeat(args.query) label = label.type(torch.cuda.LongTensor) logits = euclidean_metric(model(data_query), proto) acc = count_acc(logits, label) ave_acc.add(acc) print('batch {}: {:.2f}({:.2f})'.format(i, ave_acc.item() * 100, acc * 100)) proto = None logits = None loss = None
writer.add_scalar("data/loss", float(loss), global_count) writer.add_scalar("data/acc", float(acc), global_count) print( "epoch {}, train {}/{}, loss={:.4f} acc={:.4f}".format( epoch, i, len(train_loader), loss.item(), acc ) ) tl.add(loss.item()) ta.add(acc) optimizer.zero_grad() loss.backward() optimizer.step() tl = tl.item() ta = ta.item() model.eval() vl = Averager() va = Averager() label = torch.arange(args.validation_way).repeat(args.query) if torch.cuda.is_available(): label = label.type(torch.cuda.LongTensor) else: label = label.type(torch.LongTensor) print( "best epoch {}, best val acc={:.4f}".format(
for epoch in range(1, args.max_epoch + 1): for i, batch in enumerate(val_loader, 1): with torch.no_grad(): data, _ = [_.cuda() for _ in batch] p = args.shot * args.test_way data_shot, data_query = data[:p], data[p:] label = torch.arange(args.test_way).repeat(args.query) label = label.type(torch.cuda.LongTensor) logits = model(data_shot, data_query, pcg, inner_update_num=args.inner_step) loss = F.cross_entropy(logits, label) vl.add(loss.item()) acc = count_acc(logits, label) va.add(acc) vl.add(loss.item()) va.add(acc) pcg.reset() vl = vl.item() va = va.item() print('epoch {}, val, loss={:.4f} acc={:.4f} maxacc={:.4f}'.format( epoch, vl, va, trlog['max_acc']))
#loss.backward() total_loss.backward() if epoch > 45: optimizer_gen.step() optimizer_cnn.step() optimizer_atten.step() optimizer_global1.step() optimizer_global2.step() proto = None proto_final = None logits = None loss = None tl1 = tl1.item() tl2 = tl2.item() ta1 = ta1.item() ta2 = ta2.item() #log('epoch {}, train, loss={:.4f} acc={:.4f}'.format(epoch, tl, ta)) log('epoch {}, train, loss1={:.4f} loss2={:.4f} acc1={:.4f} acc2={:.4f}' .format(epoch, tl1, tl2, ta1, ta2)) model_cnn.eval() model_reg.eval() vl1 = Averager() vl2 = Averager() va1 = Averager() va2 = Averager()
ave_acc = Averager() for i, batch in enumerate(loader, 1): data, _ = [_.cuda() for _ in batch] data_shot, data_query = data[:k], data[k:] t = Timer() x = model(data_shot) x = x.reshape(shot, way, -1).mean(dim=0) p = x logits = euclidean_metric(model(data_query), p) pred = torch.argmax(logits, dim=1) label = torch.arange(way) # 0~11 because l[shot] == l[query] label = label.type(torch.cuda.LongTensor) acc = count_acc(logits, label) ave_acc.add(acc) x = None p = None logits = None model_acc.append(ave_acc.item() * 100) for i in range(1, 6): print('{}-way {}-shot learning, average accuracy: {}'.format( way, i, model_acc[i - 1]))
def main(args): device = torch.device(args.device) ensure_path(args.save_path) data = Data(args.dataset, args.n_batches, args.train_way, args.test_way, args.shot, args.query) train_loader = data.train_loader val_loader = data.valid_loader model = Convnet(x_dim=2).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) def save_model(name): torch.save(model.state_dict(), osp.join(args.save_path, name + '.pth')) trlog = dict( args=vars(args), train_loss=[], val_loss=[], train_acc=[], val_acc=[], max_acc=0.0, ) timer = Timer() for epoch in range(1, args.max_epoch + 1): lr_scheduler.step() model.train() tl = Averager() ta = Averager() for i, batch in enumerate(train_loader, 1): data, _ = [_.to(device) for _ in batch] data = data.reshape(-1, 2, 105, 105) p = args.shot * args.train_way embedded = model(data) embedded_shot, embedded_query = embedded[:p], embedded[p:] proto = embedded_shot.reshape(args.shot, args.train_way, -1).mean(dim=0) label = torch.arange(args.train_way).repeat(args.query).to(device) logits = euclidean_metric(embedded_query, proto) loss = F.cross_entropy(logits, label) acc = count_acc(logits, label) print('epoch {}, train {}/{}, loss={:.4f} acc={:.4f}' .format(epoch, i, len(train_loader), loss.item(), acc)) tl.add(loss.item()) ta.add(acc) optimizer.zero_grad() loss.backward() optimizer.step() tl = tl.item() ta = ta.item() model.eval() vl = Averager() va = Averager() for i, batch in enumerate(val_loader, 1): data, _ = [_.cuda() for _ in batch] data = data.reshape(-1, 2, 105, 105) p = args.shot * args.test_way data_shot, data_query = data[:p], data[p:] proto = model(data_shot) proto = proto.reshape(args.shot, args.test_way, -1).mean(dim=0) label = torch.arange(args.test_way).repeat(args.query).to(device) logits = euclidean_metric(model(data_query), proto) loss = F.cross_entropy(logits, label) acc = count_acc(logits, label) vl.add(loss.item()) va.add(acc) vl = vl.item() va = va.item() print('epoch {}, val, loss={:.4f} acc={:.4f}'.format(epoch, vl, va)) if va > trlog['max_acc']: trlog['max_acc'] = va save_model('max-acc') trlog['train_loss'].append(tl) trlog['train_acc'].append(ta) trlog['val_loss'].append(vl) trlog['val_acc'].append(va) torch.save(trlog, osp.join(args.save_path, 'trlog')) save_model('epoch-last') if epoch % args.save_epoch == 0: save_model('epoch-{}'.format(epoch)) print('ETA:{}/{}'.format(timer.measure(), timer.measure(epoch / args.max_epoch)))
def pre_train(args): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = ClassifierNet(hid_dim=args.num_filters, z_dim=args.out_dim).to(device) optim = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005) criterion = torch.nn.CrossEntropyLoss().to(device) scheduler = torch.optim.lr_scheduler.StepLR(optim, 5000, args.lr_decay) iter_counter = 0 max_val_acc = 0 model.train() while iter_counter <= args.n_iter: data_train = Dataset64(imsize=84, data_path=args.data_path) loader_train = DataLoader(dataset=data_train, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) data_valid = MiniImagenet(mode='val', n_way=args.n_way, k_shot=args.k_shot, k_query=args.k_query, batchsz=200, imsize=84, data_path=args.data_path, verbose=True) loader_valid = DataLoader(data_valid, batch_size=4, shuffle=True, num_workers=8, pin_memory=True) print_header() for step, batch in enumerate(loader_train): iter_counter += 1 if iter_counter > args.n_iter: break data = batch[0].to(device) label = batch[1].to(device) logits = model(data) loss = criterion(logits, label) acc = count_acc(logits, label) if iter_counter % 100 == 0: print_logs(iter_counter, loss.item(), acc, 0, max_val_acc) optim.zero_grad() loss.backward() optim.step() scheduler.step() if iter_counter % 300 == 0: model.eval() va = Averager() with torch.no_grad(): for i, task_batch in enumerate(loader_valid): support_x = task_batch[0].to(device) query_x = task_batch[1].to(device) query_y = task_batch[2].to(device) for inner_batch_idx in range(support_x.shape[0]): predictions = model.forward_proto( support_x[inner_batch_idx], query_x[inner_batch_idx], args.n_way, args.k_shot) acc = count_acc(predictions, query_y[inner_batch_idx]) va.add(acc) val_acc = va.item() if val_acc > max_val_acc: torch.save( model.state_dict(), './pre_train_file/pre_train-{}_{}.pth'.format( args.num_filters, args.out_dim)) max_val_acc = val_acc print_logs(iter_counter, 0, 0, val_acc, max_val_acc, False) model.train()
else: logits = euclidean_metric(model(data_query), p) label = torch.arange(args.way).repeat(args.query) label = label.type(torch.cuda.LongTensor) #print(torch.mean(logits_scaled)) if args.bound_correct == True: loss = F.cross_entropy(logits_scaled, label) acc = count_acc(logits_scaled, label) else: loss = F.cross_entropy(logits, label) acc = count_acc(logits, label) ave_acc.add(acc) ave_loss.add(loss.item()) x = None p = None logits = None accuracy_list.append(ave_acc.item() * 100) loss_list.append(ave_loss.item()) print(accuracy_list) average_acc = sum(accuracy_list) / len(accuracy_list) print('average_ACC:{}'.format(average_acc)) accuracy_list = [ accuracy_list[i] - average_acc for i in range(len(accuracy_list)) ] error = sum(np.array(accuracy_list) * np.array(accuracy_list)) / (len(accuracy_list) - 1.0) print('error:{}'.format(1.96 * np.sqrt(error / (len(accuracy_list)))))
acc = count_acc(logits, label) print('epoch {}, train {}/{}, loss={:.4f} acc={:.4f}'.format( epoch, i, len(train_loader), loss.item(), acc)) tl.add(loss.item()) ta.add(acc) optimizer.zero_grad() loss.backward() optimizer.step() proto = None logits = None loss = None tl = tl.item() ta = ta.item() model.eval() vl = Averager() va = Averager() for i, batch in enumerate(val_loader, 1): data, _ = [_ for _ in batch] p = shot * test_way data_shot, data_query = data[:p], data[p:] proto = model(data_shot) proto = proto.reshape(shot, test_way, -1).mean(dim=0)
for i, batch in enumerate(loader, 1): data, _ = [_.cuda() for _ in batch] k = args.way * args.shot data_shot, meta_support, data_query = data[:k], data[k:2*k], data[2*k:] #p = inter_fold(model, args, data_shot) x = model(data_shot) x = x.reshape(args.shot, args.way, -1).mean(dim=0) p = x lam = 0.01 proto = model(meta_support) meta_logits = euclidean_metric(proto, p) soft_labels = (F.sigmoid(meta_logits, dim=1) + lam * s_onehot) / (1 + lam) #soft_labels_norm2 = soft_labels / soft_labels.sum(dim=0) proto = torch.mm(soft_labels.permute((1, 0)), proto) logits = euclidean_metric(model(data_query), proto) label = torch.arange(args.way).repeat(args.query) label = label.type(torch.cuda.LongTensor) acc = count_acc(logits, label) ave_acc.add(acc) print('batch {}: {:.2f}({:.2f})'.format(i, ave_acc.item() * 100, acc * 100)) x = None; p = None; logits = None
if i % task_num == 0 and i > 0: total_loss = torch.stack(loss_all).sum(0) optimizer.zero_grad() optimizer_pcg.zero_grad() total_loss.backward() optimizer.step() optimizer_pcg.step() loss_all = [] pcg.reset() tl.add(loss.item()) acc = count_acc(logits, label) ta.add(acc) print('epoch {} acc={:.4f}'.format(epoch, ta.item())) if (epoch < 400 and epoch % 30 != 0): continue vl = Averager() va = Averager() for i, batch in enumerate(val_loader, 1): with torch.no_grad(): data, _ = [_.cuda() for _ in batch] p = args.shot * args.test_way data_shot, data_query = data[:p], data[p:] label = torch.arange(args.test_way).repeat(args.query) label = label.type(torch.cuda.LongTensor) logits = model(data_shot,