def train(save_pth, use_mixup, mixup_alpha): model, criteria = set_model() ema = EMA(model, ema_alpha) optim, lr_sheduler = set_optimizer(model) dltrain = get_train_loader(batch_size=batchsize, num_workers=n_workers, dataset=ds_name, pin_memory=False) for e in range(n_epochs): tic = time.time() loss_avg = train_one_epoch(model, criteria, dltrain, optim, ema, use_mixup, mixup_alpha) lr_sheduler.step() acc = evaluate(model, verbose=False) ema.apply_shadow() acc_ema = evaluate(model, verbose=False) ema.restore() toc = time.time() msg = 'epoch: {}, loss: {:.4f}, lr: {:.4f}, acc: {:.4f}, acc_ema: {:.4f}, time: {:.2f}'.format( e, loss_avg, list(optim.param_groups)[0]['lr'], acc, acc_ema, toc - tic) print(msg) save_model(model, save_pth) print('done') return model
def sort_unlabeled(ema): ema.apply_shadow() ema.model.eval() ema.model.cuda() dltrain_x, dltrain_u = get_train_loader(10, 2000, 1, L=args.n_labeled, seed=args.seed) matches = [] for ims_w, ims_s, lbs in dltrain_u: ims = ims_w.cuda() with torch.no_grad(): logits = ema.model(ims) scores = torch.softmax(logits, dim=1) predictions, preds = torch.max(scores, dim=1) top = torch.argsort(predictions, descending=True).cpu() preds = preds.cpu() predictions = predictions.cpu() # print(predictions[top[0:100]]) # print(preds[top[0:100]]) # print(top[0:1000]) name = "dataset/pseudolabels/top/top_preds" + "cifar10pB" + str( args.balance) + "." + str(args.seed) name = name + "WD" + str(args.weight_decay) + "LR" + str( args.lr) + "DT" + str(args.delT) + "T" + str(args.thr) np.save(name, top[0:1000]) name = "dataset/pseudolabels/top/top_labels" + "cifar10pB" + str( args.balance) + "." + str(args.seed) name = name + "WD" + str(args.weight_decay) + "LR" + str( args.lr) + "DT" + str(args.delT) + "T" + str(args.thr) np.save(name, preds[top[0:1000]]) ema.restore() return
def sort_unlabeled(ema,numPerClass): ema.apply_shadow() ema.model.eval() ema.model.cuda() n_iters_per_epoch = args.n_imgs_per_epoch // args.batchsize _, _, dltrain_all = get_train_loader(args.batchsize, 1, 1, n_iters_per_epoch, L=args.n_classes*numPerClass, seed=args.seed) predicted = [] labels = [] for ims_w, _, _, _, lbs in dltrain_all: ims = ims_w.cuda() labels.append(lbs) with torch.no_grad(): logits, _, _ = ema.model(ims) scores = torch.softmax(logits, dim=1) predicted.append(scores.cpu()) print( "labels ",len(labels)) labels = np.concatenate(labels, axis=0) print( "labels ",len(labels)) predicted = np.concatenate( predicted, axis=0) preds = predicted.argmax(1) probs = predicted.max(1) top = np.argsort(-probs,axis=0) del dltrain_all, logits labeledSize =args.n_classes * numPerClass unique_train_pseudo_labels, unique_train_counts = np.unique(preds, return_counts=True) print("Number of training pseudo-labels in each class: ", unique_train_counts," for classes: ", unique_train_pseudo_labels) sortByClass = np.random.randint(0,high=len(top), size=(args.n_classes, numPerClass), dtype=int) indx = np.zeros([args.n_classes], dtype=int) matches = np.zeros([args.n_classes, numPerClass], dtype=int) labls = preds[top] samples = top for i in range(len(top)): if indx[labls[i]] < numPerClass: sortByClass[labls[i], indx[labls[i]]] = samples[i] if labls[i] == labels[top[i]]: matches[labls[i], indx[labls[i]]] = 1 indx[labls[i]] += 1 if min(indx) < numPerClass: print("Counts of at least one class ", indx, " is lower than ", numPerClass) name = "dataset/seeds/size"+str(labeledSize)+"." + get_random_string(8) + ".npy" np.save(name, sortByClass[0:args.n_classes, :numPerClass]) classAcc = 100*np.sum(matches, axis=1)/numPerClass print("Accuracy of the predicted pseudo-labels: top ", labeledSize, ", ", np.mean(classAcc), classAcc ) ema.restore() return name
def train(): model, criteria_x, criteria_u = set_model() n_iters_per_epoch = n_imgs_per_epoch // batchsize dltrain_x, dltrain_u = get_train_loader( batchsize, n_iters_per_epoch, L=250, K=n_guesses ) lb_guessor = LabelGuessor(model, T=temperature) mixuper = MixUp(mixup_alpha) ema = EMA(model, ema_alpha) optim = torch.optim.Adam(model.parameters(), lr=lr) n_iters_per_epoch = n_imgs_per_epoch // batchsize lam_u_epoch = float(lam_u) / n_epoches lam_u_once = lam_u_epoch / n_iters_per_epoch train_args = dict( model=model, criteria_x=criteria_x, criteria_u=criteria_u, optim=optim, ema=ema, wd = 1 - weight_decay * lr, dltrain_x=dltrain_x, dltrain_u=dltrain_u, lb_guessor=lb_guessor, mixuper=mixuper, lambda_u=0, lambda_u_once=lam_u_once, ) best_acc = -1 print('start to train') for e in range(n_epoches): model.train() print('epoch: {}'.format(e)) train_args['lambda_u'] = e * lam_u_epoch train_one_epoch(**train_args) torch.cuda.empty_cache() acc = evaluate(ema) best_acc = acc if best_acc < acc else best_acc log_msg = [ 'epoch: {}'.format(e), 'acc: {:.4f}'.format(acc), 'best_acc: {:.4f}'.format(best_acc)] print(', '.join(log_msg))
def train(): n_iters_per_epoch = args.n_imgs_per_epoch // args.batchsize n_iters_all = n_iters_per_epoch * args.n_epochs #/ args.mu_c epsilon = 0.000001 model, criteria_x, criteria_u = set_model() lb_guessor = LabelGuessor(thresh=args.thr) ema = EMA(model, args.ema_alpha) wd_params, non_wd_params = [], [] for param in model.parameters(): if len(param.size()) == 1: non_wd_params.append(param) else: wd_params.append(param) param_list = [{'params': wd_params}, {'params': non_wd_params, 'weight_decay': 0}] optim = torch.optim.SGD(param_list, lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum, nesterov=True) lr_schdlr = WarmupCosineLrScheduler(optim, max_iter=n_iters_all, warmup_iter=0) dltrain_x, dltrain_u, dltrain_all = get_train_loader(args.batchsize, args.mu, args.mu_c, n_iters_per_epoch, L=args.n_labeled, seed=args.seed) train_args = dict( model=model, criteria_x=criteria_x, criteria_u=criteria_u, optim=optim, lr_schdlr=lr_schdlr, ema=ema, dltrain_x=dltrain_x, dltrain_u=dltrain_u, dltrain_all=dltrain_all, lb_guessor=lb_guessor, ) n_labeled = int(args.n_labeled / args.n_classes) best_acc, top1 = -1, -1 results = {'top 1 acc': [], 'best_acc': []} b_schedule = [args.n_epochs/2, 3*args.n_epochs/4] if args.boot_schedule == 1: step = int(args.n_epochs/3) b_schedule = [step, 2*step] elif args.boot_schedule == 2: step = int(args.n_epochs/4) b_schedule = [step, 2*step, 3*step] for e in range(args.n_epochs): if args.bootstrap > 1 and (e in b_schedule): seed = 99 n_labeled *= args.bootstrap name = sort_unlabeled(ema, n_labeled) print("Bootstrap at epoch ", e," Name = ",name) dltrain_x, dltrain_u, dltrain_all = get_train_loader(args.batchsize, args.mu, args.mu_c, n_iters_per_epoch, L=10*n_labeled, seed=seed, name=name) train_args = dict( model=model, criteria_x=criteria_x, criteria_u=criteria_u, optim=optim, lr_schdlr=lr_schdlr, ema=ema, dltrain_x=dltrain_x, dltrain_u=dltrain_u, dltrain_all=dltrain_all, lb_guessor=lb_guessor, ) model.train() train_one_epoch(**train_args) torch.cuda.empty_cache() if args.test == 0 or args.lam_clr < epsilon: top1 = evaluate(ema) * 100 elif args.test == 1: memory_data = utils.CIFAR10Pair(root='dataset', train=True, transform=utils.test_transform, download=False) memory_data_loader = DataLoader(memory_data, batch_size=args.batchsize, shuffle=False, num_workers=16, pin_memory=True) test_data = utils.CIFAR10Pair(root='dataset', train=False, transform=utils.test_transform, download=False) test_data_loader = DataLoader(test_data, batch_size=args.batchsize, shuffle=False, num_workers=16, pin_memory=True) c = len(memory_data.classes) #10 top1 = test(model, memory_data_loader, test_data_loader, c, e) best_acc = top1 if best_acc < top1 else best_acc results['top 1 acc'].append('{:.4f}'.format(top1)) results['best_acc'].append('{:.4f}'.format(best_acc)) data_frame = pd.DataFrame(data=results) data_frame.to_csv(result_dir + '/' + save_name_pre + '.accuracy.csv', index_label='epoch') log_msg = [ 'epoch: {}'.format(e + 1), 'top 1 acc: {:.4f}'.format(top1), 'best_acc: {:.4f}'.format(best_acc)] print(', '.join(log_msg))
( best_model_path, checkpoint_path, log_path, snapshots_folder, ) = train_directory_setup(label, model_name, dataset, seq_seed, data_level, base_dir) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") num_workers = 4 # Loads train, validation, and test data num_classes = int(dataset.split("cifar")[-1]) trainloader = cifar.get_train_loader(data_dir, label, num_classes, num_workers, 128, seq_seed, data_level, label_dir) validloader = cifar.get_valid_loader(data_dir, label, num_classes, num_workers, 100, seq_seed, label_dir) testloader = cifar.get_test_loader(data_dir, label, num_classes, num_workers, 100, label_dir) # Model setup if "category" in label or label in ("lowdim", "glove"): if label == "glove": model = architecture.CategoryModel(model_name, 50) else: model = architecture.CategoryModel(model_name, num_classes) elif label == "bert": model = architecture.BERTHighDimensionalModel(model_name, num_classes) else:
def train(gen_path, save_pth): model = Resnet18(n_classes=n_classes, pre_act=pre_act) model.train() model.cuda() criteria = nn.KLDivLoss(reduction='batchmean') generator = Resnet18(n_classes=10) state_dict = torch.load(gen_path) generator.load_state_dict(state_dict) generator.train() generator.cuda() batchsize = 256 n_workers = 8 dltrain = get_train_loader( batch_size=batchsize, num_workers=n_workers, dataset=ds_name, pin_memory=True ) lr0 = 2e-1 lr_eta = 1e-5 momentum = 0.9 wd = 5e-4 n_epochs = 50 n_warmup_epochs = 10 warmup_start_lr = 1e-5 warmup_method = 'linear' optim = torch.optim.SGD( model.parameters(), lr=lr0, momentum=momentum, weight_decay=wd ) lr_sheduler = WarmupCosineAnnealingLR( optim, warmup_start_lr=warmup_start_lr, warmup_epochs=n_warmup_epochs, warmup=warmup_method, max_epochs=n_epochs, cos_eta=lr_eta, ) for e in range(n_epochs): tic = time.time() model.train() lr_sheduler.step() loss_epoch = [] for _, (ims, _) in enumerate(dltrain): ims = ims.cuda() # generate labels with torch.no_grad(): lbs = generator(ims).clone() lbs = torch.softmax(lbs, dim=1) optim.zero_grad() if mixup: bs = ims.size(0) idx = torch.randperm(bs) lam = np.random.beta(mixup_alpha, mixup_alpha) ims_mix = lam * ims + (1.-lam) * ims[idx] logits = model(ims_mix) probs = F.log_softmax(logits, dim=1) loss1 = criteria(probs, lbs) loss2 = criteria(probs, lbs[idx]) loss = lam * loss1 + (1.-lam) * loss2 else: logits = model(ims) probs = F.log_softmax(logits, dim=1) loss = criteria(probs, lbs) loss.backward() loss_epoch.append(loss.item()) optim.step() model.eval() acc = evaluate(model, verbose=False) toc = time.time() msg = 'epoch: {}, loss: {:.4f}, lr: {:.4f}, acc: {:.4f}, time: {:.2f}'.format( e, sum(loss_epoch)/len(loss_epoch), list(optim.param_groups)[0]['lr'], acc, toc - tic ) print(msg) model.cpu() if hasattr(model, 'module'): state_dict = model.module.state_dict() else: state_dict = model.state_dict() torch.save(state_dict, save_pth) return model
def train(): n_iters_per_epoch = args.n_imgs_per_epoch // args.batchsize n_iters_all = n_iters_per_epoch * args.n_epochs model, criteria_x, criteria_u = set_model() dltrain_x, dltrain_u = get_train_loader(args.batchsize, args.mu, n_iters_per_epoch, L=args.n_labeled, seed=args.seed) lb_guessor = LabelGuessor(thresh=args.thr) ema = EMA(model, args.ema_alpha) wd_params, non_wd_params = [], [] for param in model.parameters(): if len(param.size()) == 1: non_wd_params.append(param) else: wd_params.append(param) param_list = [{ 'params': wd_params }, { 'params': non_wd_params, 'weight_decay': 0 }] optim = torch.optim.SGD(param_list, lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum, nesterov=True) lr_schdlr = WarmupCosineLrScheduler(optim, max_iter=n_iters_all, warmup_iter=0) train_args = dict( model=model, criteria_x=criteria_x, criteria_u=criteria_u, optim=optim, lr_schdlr=lr_schdlr, ema=ema, dltrain_x=dltrain_x, dltrain_u=dltrain_u, lb_guessor=lb_guessor, lambda_u=args.lam_u, lambda_c=args.lam_c, n_iters=n_iters_per_epoch, ) best_acc = -1 print('start to train') for e in range(args.n_epochs): model.train() print('epoch: {}'.format(e + 1)) train_one_epoch(**train_args) torch.cuda.empty_cache() acc = evaluate(ema) best_acc = acc if best_acc < acc else best_acc log_msg = [ 'epoch: {}'.format(e), 'acc: {:.4f}'.format(acc), 'best_acc: {:.4f}'.format(best_acc) ] print(', '.join(log_msg)) sort_unlabeled(ema)