def evaluate_curve(dir='/tmp/curve/', ckpt=None, num_points=61, dataset='CIFAR10', use_test=True, transform='VGG', data_path=None, batch_size=128, num_workers=4, model_type=None, curve_type=None, num_bends=3, wd=1e-4): args = EvalCurveArgSet(dir=dir, ckpt=ckpt, num_points=num_points, dataset=dataset, use_test=use_test, transform=transform, data_path=data_path, batch_size=batch_size, num_workers=num_workers, model=model_type, curve=curve_type, num_bends=num_bends, wd=wd) if True: q = 1 # parser = argparse.ArgumentParser(description='DNN curve evaluation') # parser.add_argument('--dir', type=str, default='/tmp/eval', metavar='DIR', # help='training directory (default: /tmp/eval)') # # parser.add_argument('--num_points', type=int, default=61, metavar='N', # help='number of points on the curve (default: 61)') # # parser.add_argument('--dataset', type=str, default='CIFAR10', metavar='DATASET', # help='dataset name (default: CIFAR10)') # parser.add_argument('--use_test', action='store_true', # help='switches between validation and test set (default: validation)') # parser.add_argument('--transform', type=str, default='VGG', metavar='TRANSFORM', # help='transform name (default: VGG)') # parser.add_argument('--data_path', type=str, default=None, metavar='PATH', # help='path to datasets location (default: None)') # parser.add_argument('--batch_size', type=int, default=128, metavar='N', # help='input batch size (default: 128)') # parser.add_argument('--num_workers', type=int, default=4, metavar='N', # help='number of workers (default: 4)') # # parser.add_argument('--model', type=str, default=None, metavar='MODEL', # help='model name (default: None)') # parser.add_argument('--curve', type=str, default=None, metavar='CURVE', # help='curve type to use (default: None)') # parser.add_argument('--num_bends', type=int, default=3, metavar='N', # help='number of curve bends (default: 3)') # # parser.add_argument('--ckpt', type=str, default=None, metavar='CKPT', # help='checkpoint to eval (default: None)') # # parser.add_argument('--wd', type=float, default=1e-4, metavar='WD', # help='weight decay (default: 1e-4)') # args = parser.parse_args() os.makedirs(args.dir, exist_ok=True) torch.backends.cudnn.benchmark = True loaders, num_classes = data.loaders(args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, args.use_test, shuffle_train=False) architecture = getattr(models, args.model) curve = getattr(curves, args.curve) model = curves.CurveNet( num_classes, curve, architecture.curve, args.num_bends, architecture_kwargs=architecture.kwargs, ) model.cuda() checkpoint = torch.load(args.ckpt) model.load_state_dict(checkpoint['model_state']) criterion = F.cross_entropy regularizer = curves.l2_regularizer(args.wd) T = args.num_points ts = np.linspace(0.0, 1.0, T) tr_loss = np.zeros(T) tr_nll = np.zeros(T) tr_acc = np.zeros(T) te_loss = np.zeros(T) te_nll = np.zeros(T) te_acc = np.zeros(T) tr_err = np.zeros(T) te_err = np.zeros(T) dl = np.zeros(T) previous_weights = None columns = [ 't', 'Train loss', 'Train nll', 'Train error (%)', 'Test nll', 'Test error (%)', 'Distance' ] t = torch.FloatTensor([0.0]).cuda() for i, t_value in enumerate(ts): t.data.fill_(t_value) weights = model.weights(t) if previous_weights is not None: dl[i] = np.sqrt(np.sum(np.square(weights - previous_weights))) previous_weights = weights.copy() utils.update_bn(loaders['train'], model, t=t) tr_res = utils.test(loaders['train'], model, criterion, regularizer, t=t) te_res = utils.test(loaders['test'], model, criterion, regularizer, t=t) tr_loss[i] = tr_res['loss'] tr_nll[i] = tr_res['nll'] tr_acc[i] = tr_res['accuracy'] tr_err[i] = 100.0 - tr_acc[i] te_loss[i] = te_res['loss'] te_nll[i] = te_res['nll'] te_acc[i] = te_res['accuracy'] te_err[i] = 100.0 - te_acc[i] values = [ t, tr_loss[i], tr_nll[i], tr_err[i], te_nll[i], te_err[i], dl[i] ] table = tabulate.tabulate([values], columns, tablefmt='simple', floatfmt='10.4f') if i % 40 == 0: table = table.split('\n') table = '\n'.join([table[1]] + table) else: table = table.split('\n')[2] print(table) def stats(values, dl): min = np.min(values) max = np.max(values) avg = np.mean(values) int = np.sum(0.5 * (values[:-1] + values[1:]) * dl[1:]) / np.sum(dl[1:]) return min, max, avg, int tr_loss_min, tr_loss_max, tr_loss_avg, tr_loss_int = stats(tr_loss, dl) tr_nll_min, tr_nll_max, tr_nll_avg, tr_nll_int = stats(tr_nll, dl) tr_err_min, tr_err_max, tr_err_avg, tr_err_int = stats(tr_err, dl) te_loss_min, te_loss_max, te_loss_avg, te_loss_int = stats(te_loss, dl) te_nll_min, te_nll_max, te_nll_avg, te_nll_int = stats(te_nll, dl) te_err_min, te_err_max, te_err_avg, te_err_int = stats(te_err, dl) print('Length: %.2f' % np.sum(dl)) print( tabulate.tabulate([ [ 'train loss', tr_loss[0], tr_loss[-1], tr_loss_min, tr_loss_max, tr_loss_avg, tr_loss_int ], [ 'train error (%)', tr_err[0], tr_err[-1], tr_err_min, tr_err_max, tr_err_avg, tr_err_int ], [ 'test nll', te_nll[0], te_nll[-1], te_nll_min, te_nll_max, te_nll_avg, te_nll_int ], [ 'test error (%)', te_err[0], te_err[-1], te_err_min, te_err_max, te_err_avg, te_err_int ], ], ['', 'start', 'end', 'min', 'max', 'avg', 'int'], tablefmt='simple', floatfmt='10.4f')) np.savez( os.path.join(args.dir, 'curve.npz'), ts=ts, dl=dl, tr_loss=tr_loss, tr_loss_min=tr_loss_min, tr_loss_max=tr_loss_max, tr_loss_avg=tr_loss_avg, tr_loss_int=tr_loss_int, tr_nll=tr_nll, tr_nll_min=tr_nll_min, tr_nll_max=tr_nll_max, tr_nll_avg=tr_nll_avg, tr_nll_int=tr_nll_int, tr_acc=tr_acc, tr_err=tr_err, tr_err_min=tr_err_min, tr_err_max=tr_err_max, tr_err_avg=tr_err_avg, tr_err_int=tr_err_int, te_loss=te_loss, te_loss_min=te_loss_min, te_loss_max=te_loss_max, te_loss_avg=te_loss_avg, te_loss_int=te_loss_int, te_nll=te_nll, te_nll_min=te_nll_min, te_nll_max=te_nll_max, te_nll_avg=te_nll_avg, te_nll_int=te_nll_int, te_acc=te_acc, te_err=te_err, te_err_min=te_err_min, te_err_max=te_err_max, te_err_avg=te_err_avg, te_err_int=te_err_int, )
f.write('python' + ' '.join(sys.argv)) f.write('\n') ############################################################################### #define the model ############################################################################### print('Using model %s' % args.model) model_cfg = getattr(models, args.model) ############################################################################### # Load data ############################################################################### loaders, ntokens = data.loaders(args.dataset, args.data_path, args.batch_size, args.bptt, model_cfg.transform_train, model_cfg.transform_test, use_validation=not args.use_test, use_cuda=args.cuda) ############################################################################### # Build the model ############################################################################### print('Preparing model') print(*model_cfg.args) print('using ', args.zdim, ' latent space') model = model_cfg.base(*model_cfg.args, noise_dim=args.zdim, zdim=args.zdim, ntoken=ntokens, ninp=args.emsize,
import data import models import curves import utils import pickle parser = argparse.ArgumentParser(description='DNN curve training') parser.add_argument('--number_points', type=int, default=1, metavar='NM', help='for how many points compute centre mass') args = parser.parse_args() loaders, num_classes = data.loaders("CIFAR10", "data", 128, 1, "VGG", False) architecture = getattr(models, "VGG16") number_points = args.number_points models = [ architecture.base(num_classes=10, **architecture.kwargs) for i in range(number_points) ] for m in models: m.cuda() base_model = architecture.base(10, **architecture.kwargs) base_model.cuda()
def train_model(dir='/tmp/curve/', dataset='CIFAR10', use_test=True, transform='VGG', data_path=None, batch_size=128, num_workers=4, model_type=None, curve_type=None, num_bends=3, init_start=None, fix_start=True, init_end=None, fix_end=True, init_linear=True, resume=None, epochs=200, save_freq=50, lr=.01, momentum=.9, wd=1e-4, seed=1): args = TrainArgSet(dir=dir, dataset=dataset, use_test=use_test, transform=transform, data_path=data_path, batch_size=batch_size, num_workers=num_workers, model=model_type, curve=curve_type, num_bends=num_bends, init_start=init_start, fix_start=fix_start, init_end=init_end, fix_end=fix_end, init_linear=init_linear, resume=resume, epochs=epochs, save_freq=save_freq, lr=lr, momentum=momentum, wd=wd, seed=seed) os.makedirs(args.dir, exist_ok=True) with open(os.path.join(args.dir, 'command.sh'), 'w') as f: f.write(' '.join(sys.argv)) f.write('\n') torch.backends.cudnn.benchmark = True torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) loaders, num_classes = data.loaders(args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, args.use_test) architecture = getattr(models, args.model) if args.curve is None: model = architecture.base(num_classes=num_classes, **architecture.kwargs) else: curve = getattr(curves, args.curve) model = curves.CurveNet( num_classes, curve, architecture.curve, args.num_bends, args.fix_start, args.fix_end, architecture_kwargs=architecture.kwargs, ) base_model = None if args.resume is None: for path, k in [(args.init_start, 0), (args.init_end, args.num_bends - 1)]: if path is not None: if base_model is None: base_model = architecture.base(num_classes=num_classes, **architecture.kwargs) checkpoint = torch.load(path) print('Loading %s as point #%d' % (path, k)) base_model.load_state_dict(checkpoint['model_state']) model.import_base_parameters(base_model, k) if args.init_linear: print('Linear initialization.') model.init_linear() model.cuda() def learning_rate_schedule(base_lr, epoch, total_epochs): alpha = epoch / total_epochs if alpha <= 0.5: factor = 1.0 elif alpha <= 0.9: factor = 1.0 - (alpha - 0.5) / 0.4 * 0.99 else: factor = factor = .01 * (1 - ((alpha - .9) / .1)) return factor * base_lr criterion = F.cross_entropy regularizer = None if args.curve is None else curves.l2_regularizer( args.wd) optimizer = torch.optim.SGD( filter(lambda param: param.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.wd if args.curve is None else 0.0) start_epoch = 1 if args.resume is not None: print('Resume training from %s' % args.resume) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) columns = ['ep', 'lr', 'tr_loss', 'tr_acc', 'te_nll', 'te_acc', 'time'] utils.save_checkpoint(args.dir, start_epoch - 1, model_state=model.state_dict(), optimizer_state=optimizer.state_dict()) has_bn = utils.check_bn(model) test_res = {'loss': None, 'accuracy': None, 'nll': None} for epoch in range(start_epoch, args.epochs + 1): # if epoch%10 == 0: # print("<***** STARTING EPOCH " + str(epoch) + " *****>") time_ep = time.time() lr = learning_rate_schedule(args.lr, epoch, args.epochs) utils.adjust_learning_rate(optimizer, lr) train_res = utils.train(loaders['train'], model, optimizer, criterion, regularizer) if args.curve is None or not has_bn: test_res = utils.test(loaders['test'], model, criterion, regularizer) if epoch % args.save_freq == 0: utils.save_checkpoint(args.dir, epoch, model_state=model.state_dict(), optimizer_state=optimizer.state_dict()) time_ep = time.time() - time_ep values = [ epoch, lr, train_res['loss'], train_res['accuracy'], test_res['nll'], test_res['accuracy'], time_ep ] table = tabulate.tabulate([values], columns, tablefmt='simple', floatfmt='9.4f') if epoch % 40 == 1 or epoch == start_epoch: table = table.split('\n') table = '\n'.join([table[1]] + table) else: table = table.split('\n')[2] print(table) if args.epochs % args.save_freq != 0: utils.save_checkpoint(args.dir, args.epochs, model_state=model.state_dict(), optimizer_state=optimizer.state_dict())
args = parser.parse_args() torch.backends.cudnn.benchmark = True torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) print('Using model %s' % args.model) model_cfg = getattr(models, args.model) print('Split classes', args.split_classes) print('Loading dataset %s from %s' % (args.dataset, args.data_path)) loaders, num_classes = data.loaders(args.dataset, args.data_path, 1, args.num_workers, model_cfg.transform_train, model_cfg.transform_test, use_validation=not args.use_test, split_classes=args.split_classes) if args.cov_mat: args.no_cov_mat = False else: args.no_cov_mat = True print('Preparing models') swag_model = swag.SWAG(model_cfg.base, no_cov_mat=args.no_cov_mat, max_num_models=20, loading=True, *model_cfg.args,
else: args.device = torch.device('cpu') torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) print('Using model %s' % args.model) model_class = getattr(torchvision.models, args.model) print('Loading ImageNet from %s' % (args.data_path)) loaders, num_classes = data.loaders( args.data_path, args.batch_size, args.num_workers, ) print('Preparing model') swag_model = SWAG(model_class, no_cov_mat=not args.cov_mat, loading=True, max_num_models=20, num_classes=num_classes) swag_model.to(args.device) criterion = losses.cross_entropy print('Loading checkpoint %s' % args.ckpt) checkpoint = torch.load(args.ckpt)
args = parser.parse_args() args.dir += '{}-{}/'.format(args.model, args.corrupt_epochs) os.makedirs(args.dir, exist_ok=True) with open(os.path.join(args.dir, 'command.sh'), 'w') as f: f.write(' '.join(sys.argv)) f.write('\n') torch.backends.cudnn.benchmark = True torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) loaders, num_classes = data.loaders(args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, args.use_test, corrupt=True) architecture = getattr(models, args.model) if args.curve is None: model = architecture.base(num_classes=num_classes, **architecture.kwargs) else: curve = getattr(curves, args.curve) model = curves.CurveNet( num_classes, curve, architecture.curve, args.num_bends,
def main(): """Main entry point""" args = parse_args() os.makedirs(args.dir, exist_ok=True) utils.torch_settings(seed=None, benchmark=True) loaders, num_classes = data.loaders(args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, args.use_test, shuffle_train=False) model = init_model(args, num_classes) criterion = F.cross_entropy regularizer = curves.l2_regularizer(args.wd) (ts, tr_loss, tr_nll, tr_acc, te_loss, te_nll, te_acc, tr_err, te_err, dl) = init_metrics(args.num_points) previous_weights = None columns = [ "t", "Train loss", "Train nll", "Train error (%)", "Test nll", "Test error (%)" ] curve_metrics = metrics.TestCurve(args.num_points, columns) t = torch.FloatTensor([0.0]).cuda() for i, t_value in enumerate(ts): t.data.fill_(t_value) weights = model.weights(t) if previous_weights is not None: dl[i] = np.sqrt(np.sum(np.square(weights - previous_weights))) previous_weights = weights.copy() utils.update_bn(loaders["train"], model, t=t) tr_res = utils.test(loaders["train"], model, criterion, regularizer, t=t) te_res = utils.test(loaders["test"], model, criterion, regularizer, t=t) tr_loss[i] = tr_res["loss"] tr_nll[i] = tr_res["nll"] tr_acc[i] = tr_res["accuracy"] tr_err[i] = 100.0 - tr_acc[i] te_loss[i] = te_res["loss"] te_nll[i] = te_res["nll"] te_acc[i] = te_res["accuracy"] te_err[i] = 100.0 - te_acc[i] curve_metrics.add_meas(i, tr_res, te_res) values = [t, tr_loss[i], tr_nll[i], tr_err[i], te_nll[i], te_err[i]] table = tabulate.tabulate([values], columns, tablefmt="simple", floatfmt="10.4f") print(curve_metrics.table(i, with_header=i % 40 == 0)) if i % 40 == 0: table = table.split("\n") table = "\n".join([table[1]] + table) else: table = table.split("\n")[2] print(table) def stats(values, dl): min = np.min(values) max = np.max(values) avg = np.mean(values) int = np.sum(0.5 * (values[:-1] + values[1:]) * dl[1:]) / np.sum(dl[1:]) return min, max, avg, int tr_loss_min, tr_loss_max, tr_loss_avg, tr_loss_int = stats(tr_loss, dl) tr_nll_min, tr_nll_max, tr_nll_avg, tr_nll_int = stats(tr_nll, dl) tr_err_min, tr_err_max, tr_err_avg, tr_err_int = stats(tr_err, dl) te_loss_min, te_loss_max, te_loss_avg, te_loss_int = stats(te_loss, dl) te_nll_min, te_nll_max, te_nll_avg, te_nll_int = stats(te_nll, dl) te_err_min, te_err_max, te_err_avg, te_err_int = stats(te_err, dl) print("Length: %.2f" % np.sum(dl)) print( tabulate.tabulate([ [ "train loss", tr_loss[0], tr_loss[-1], tr_loss_min, tr_loss_max, tr_loss_avg, tr_loss_int ], [ "train error (%)", tr_err[0], tr_err[-1], tr_err_min, tr_err_max, tr_err_avg, tr_err_int ], [ "test nll", te_nll[0], te_nll[-1], te_nll_min, te_nll_max, te_nll_avg, te_nll_int ], [ "test error (%)", te_err[0], te_err[-1], te_err_min, te_err_max, te_err_avg, te_err_int ], ], ["", "start", "end", "min", "max", "avg", "int"], tablefmt="simple", floatfmt="10.4f")) np.savez( os.path.join(args.dir, "curve.npz"), ts=ts, dl=dl, tr_loss=tr_loss, tr_loss_min=tr_loss_min, tr_loss_max=tr_loss_max, tr_loss_avg=tr_loss_avg, tr_loss_int=tr_loss_int, tr_nll=tr_nll, tr_nll_min=tr_nll_min, tr_nll_max=tr_nll_max, tr_nll_avg=tr_nll_avg, tr_nll_int=tr_nll_int, tr_acc=tr_acc, tr_err=tr_err, tr_err_min=tr_err_min, tr_err_max=tr_err_max, tr_err_avg=tr_err_avg, tr_err_int=tr_err_int, te_loss=te_loss, te_loss_min=te_loss_min, te_loss_max=te_loss_max, te_loss_avg=te_loss_avg, te_loss_int=te_loss_int, te_nll=te_nll, te_nll_min=te_nll_min, te_nll_max=te_nll_max, te_nll_avg=te_nll_avg, te_nll_int=te_nll_int, te_acc=te_acc, te_err=te_err, te_err_min=te_err_min, te_err_max=te_err_max, te_err_avg=te_err_avg, te_err_int=te_err_int, )
def compute_nlls(self, logdirs, model_name, num_classes, setting, log,\ plen=1, reverse_order=False, max_std=5, max_enslen=10**5): loaders, num_classes = data.loaders( "CIFAR%d"%num_classes, "./data/", 128, 1, "%s_noDA"%("VGG" if model_name == "VGG16" else "ResNet"), True ) targets = np.array(loaders["test"].dataset.targets) ll = 1 if not reverse_order else -1 if not type(logdirs) == list: logdirs = [logdirs] preds = {} for logdir in logdirs: for i, p_folder in enumerate(sorted(os.listdir(logdir))): if not "ipynb" in p_folder and not "run" in p_folder: p_str = p_folder x = x = p_folder.find("_") if x > 0: p = float(p_folder[plen:x]) else: p = float(p_folder[plen:]) exp_folders = sorted(os.listdir(logdir+"/"+p_folder)) if not p in preds: preds[p] = [] for exp_folder in exp_folders: if not "ipynb" in logdir+"/"+p_folder+"/"+exp_folder and\ not "run" in logdir+"/"+p_folder+"/"+exp_folder and\ not "skipsameseed" in exp_folder: for f in sorted(os.listdir(logdir+"/"+p_folder+"/"+exp_folder))[::ll]: if "predictions" in f: fn = logdir+"/"+p_folder+"/"+exp_folder+"/"+f if self.setup == 1: ppp = softmax(np.float64(np.load(fn))) else: ppp = np.float64(np.load(fn)) acc = np.equal(np.argmax(ppp, axis=1), targets).mean() if acc > 0.15: preds[p].append(ppp[:, :, None] if self.setup==1\ else ppp) self.nlls_c = {} self.nlls_nc = {} self.accs_global = {} self.temps_global = {} ps = sorted(preds.keys())[::-1] try: for i, p_marker in enumerate(ps): if self.setup == 1 or self.regime == "optimal": self.nlls_c[p_marker] = [] self.nlls_nc[p_marker] = [] self.accs_global[p_marker] = [] self.temps_global[p_marker] = [] leng = min(len(preds[p_marker]), max_enslen) for l in range(1, leng+1): log.print(p_marker, l) accs, c_nlls, nc_nlls, temps = [], [], [], [] if l < leng // 2 + 2: count = min(len(preds[p_marker])//l, max_std) for j in range(count): ret = self.get_ens_quality1_2o(preds[p_marker][j*l:(j+1)*l], targets) if self.regime == "optimal": acc, nc_nll, c_nll, predictions, temps_ = ret else: # "grid" acc, c_nll, predictions = ret if acc > 0.15: accs.append(acc) c_nlls.append(c_nll) if self.regime == "optimal": nc_nlls.append(nc_nll) temps.append(temps_) else: ret = self.get_ens_quality_cumulative1_2o(predictions, \ l-1, \ preds[p_marker][l-1], \ targets) if self.regime == "optimal": acc, nc_nll, c_nll, predictions, temps_ = ret else: # "grid" acc, c_nll, predictions = ret if acc > 0.15: accs.append(acc) c_nlls.append(c_nll) if self.regime == "optimal": nc_nlls.append(nc_nll) temps.append(temps_) self.nlls_c[p_marker].append(c_nlls) self.nlls_nc[p_marker].append(nc_nlls) self.accs_global[p_marker].append(accs) self.temps_global[p_marker].append(temps) else: # setup = 2, regime == "grid" self.nlls_c[p_marker] = {} self.nlls_nc[p_marker] = {} self.accs_global[p_marker] = {} self.temps_global[p_marker] = {} for temp in self.temps: log.print(p_marker, temp) preds_p_marker_with_t = [np.exp(apply_t(pr, temp))[:, :, np.newaxis] \ for pr in preds[p_marker]] self.nlls_c[p_marker][temp] = [] self.nlls_nc[p_marker][temp] = [] self.accs_global[p_marker][temp] = [] self.temps_global[p_marker][temp] = [] leng = min(len(preds_p_marker_with_t), max_enslen) for l in range(1, leng+1): accs, c_nlls, nc_nlls, temps = [], [], [], [] if l < leng // 2 + 2: count = min(len(preds_p_marker_with_t)//l, max_std) for j in range(count): ret = self.get_ens_quality2g(preds_p_marker_with_t\ [j*l:(j+1)*l], targets) if self.regime == "optimal": acc, nc_nll, c_nll, predictions, temps_ = ret else: # "grid" acc, c_nll, predictions = ret if acc > 0.15: accs.append(acc) c_nlls.append(c_nll) if self.regime == "optimal": nc_nlls.append(nc_nll) temps.append(temps_) else: ret = self.get_ens_quality_cumulative2g(predictions, \ l-1, \ preds_p_marker_with_t[l-1], \ targets) if self.regime == "optimal": acc, nc_nll, c_nll, predictions, temps_ = ret else: # "grid" acc, c_nll, predictions = ret if acc > 0.15: accs.append(acc) c_nlls.append(c_nll) if self.regime == "optimal": nc_nlls.append(nc_nll) temps.append(temps_) self.nlls_c[p_marker][temp].append(c_nlls) self.nlls_nc[p_marker][temp].append(nc_nlls) self.accs_global[p_marker][temp].append(accs) self.temps_global[p_marker][temp].append(temps) self.save(model_name, num_classes, setting) except: log.print("Except save") self.save(model_name, num_classes, setting) return self.nlls_c, self.nlls_nc, self.accs_global, self.temps_global
os.makedirs(args.dir, exist_ok=True) with open(os.path.join(args.dir, 'command.sh'), 'w') as f: f.write(' '.join(sys.argv)) f.write('\n') if args.cuda: torch.backends.cudnn.benchmark = True torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) loaders, num_classes = data.loaders( args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, train_random=True, shuffle_train=True, ) architecture = getattr(models, args.model) num_classes = int(num_classes) model = architecture.base(num_classes=num_classes, **architecture.kwargs) if args.cuda: model.cuda() def learning_rate_schedule(base_lr, epoch, total_epochs):
checkpoint = torch.load(args.ckpt) # start_epoch = checkpoint['epoch'] + 1 start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['model_state']) model.cuda() loaders, num_classes = data.loaders( args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, args.use_test, shuffle_train=True, weights_generator=regularization.dataset_weights_generator( model, args.weight_coef, func_type=args.weighted_samples, normalize=True, batch_size=args.batch_size), logits_generator=regularization.dataset_logits_generator( model, transform=getattr(getattr(data.Transforms, args.dataset), args.transform).train, batch_size=args.batch_size), ) # Max = 0 # Min = 0 # for (_, _, logits) in loaders['train']: # Max = max(Max, logits.max().item()) # Min = min(Min, logits.min().item())
def main(): """Main entry point""" args = parse_args() utils.torch_settings(seed=args.seed, benchmark=True) os.makedirs(args.dir, exist_ok=True) store_command(args) loaders, num_classes = data.loaders(args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, args.use_test) model = init_model(args, num_classes) criterion = F.cross_entropy regularizer = None if args.curve is None else curves.l2_regularizer( args.wd) optimizer = torch.optim.SGD( filter(lambda param: param.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.wd if args.curve is None else 0.0) start_epoch = 1 if args.resume is not None: print("Resume training from %s" % args.resume) checkpoint = torch.load(args.resume) start_epoch = checkpoint["epoch"] + 1 model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) utils.save_checkpoint(args.dir, start_epoch - 1, model_state=model.state_dict(), optimizer_state=optimizer.state_dict()) has_bn = utils.check_bn(model) # test_res = {"loss": None, "accuracy": None, "nll": None} print("Training") for epoch in range(start_epoch, args.epochs + 1): lr = learning_rate_schedule(args.lr, epoch, args.epochs) utils.adjust_learning_rate(optimizer, lr) train_res, test_res, epoch_duration = train_epoch( model=model, loaders=loaders, optimizer=optimizer, criterion=criterion, regularizer=regularizer, args=args, has_bn=has_bn) save_model(epoch, args.save_freq, args.dir, model, optimizer) print_epoch(train_res, test_res, lr=lr, epoch=epoch, start_epoch=start_epoch, epoch_duration=epoch_duration) if args.epochs % args.save_freq != 0: utils.save_checkpoint(args.dir, args.epochs, model_state=model.state_dict(), optimizer_state=optimizer.state_dict())
dtype = torch.cuda.FloatTensor if args.cuda else torch.FloatTensor path = args.data dataset = data.TxtLoader(path) params = { 'nhid': args.nhid, 'nlayers': args.nlayers, 'dropout': args.dropout, 'batch': args.batch_size, 'seq': args.seq, 'type': dtype, 'alphabet_size': len(dataset.alphabet) } dataloaders = data.loaders(dataset, params) model = lstm.LSTM(params).type(params['type']) optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() def sequence_to_one_hot(sequence): """Turns a sequence of chars into one-hot Tensor""" batch_size = params['batch'] * (params['seq'] + 1) assert len(sequence) == batch_size, 'Sequence must be a batch' tensor = torch.zeros(len(sequence), params['alphabet_size']).type(params['type']) for i, c in enumerate(sequence):
parser.add_argument('--wd', type=float, default=1e-4, metavar='WD', help='weight decay (default: 1e-4)') args = parser.parse_args() os.makedirs(args.dir, exist_ok=True) torch.backends.cudnn.benchmark = True loaders, num_classes = data.loaders(args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, args.use_test, shuffle_train=False) architecture = getattr(models, args.model) curve = getattr(curves, args.curve) curve_model = curves.CurveNet( num_classes, curve, architecture.curve, args.num_bends, architecture_kwargs=architecture.kwargs, ) curve_model.cuda()
args = parser.parse_args() device_id = 'cuda:' + str(args.device) torch.cuda.set_device(device_id) os.makedirs(args.dir, exist_ok=True) if args.cuda: torch.backends.cudnn.benchmark = True loaders, num_classes = dateset.loaders( args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, train_random=False, shuffle_train=False, ) num_classes = int(num_classes) architecture = getattr(models, args.model) print('connecting {} models via {} {} method'.format(args.model, args.point_finder, args.method)) beg_time = time.time() print('getting loaders') finder_loaders, _ = dateset.loaders( args.dataset,
def main(): parser = argparse.ArgumentParser(description='DNN curve training') parser.add_argument('--dir', type=str, default='/tmp/curve/', metavar='DIR', help='training directory (default: /tmp/curve/)') parser.add_argument('--dataset', type=str, default='CIFAR10', metavar='DATASET', help='dataset name (default: CIFAR10)') parser.add_argument( '--use_test', action='store_true', help='switches between validation and test set (default: validation)') parser.add_argument('--transform', type=str, default='VGG', metavar='TRANSFORM', help='transform name (default: VGG)') parser.add_argument('--data_path', type=str, default=None, metavar='PATH', help='path to datasets location (default: None)') parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size (default: 128)') parser.add_argument('--num-workers', type=int, default=4, metavar='N', help='number of workers (default: 4)') parser.add_argument('--model', type=str, default=None, metavar='MODEL', required=True, help='model name (default: None)') parser.add_argument('--comment', type=str, default="", metavar='T', help='comment to the experiment') parser.add_argument( '--resume', type=str, default=None, metavar='CKPT', help='checkpoint to resume training from (default: None)') parser.add_argument('--epochs', type=int, default=200, metavar='N', help='number of epochs to train (default: 200)') parser.add_argument('--save_freq', type=int, default=50, metavar='N', help='save frequency (default: 50)') parser.add_argument('--print_freq', type=int, default=1, metavar='N', help='print frequency (default: 1)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='initial learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--wd', type=float, default=1e-4, metavar='WD', help='weight decay (default: 1e-4)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--width', type=int, default=64, metavar='N', help='width of 1 network') parser.add_argument('--num-nets', type=int, default=8, metavar='N', help='number of networks in ensemble') parser.add_argument('--num-exps', type=int, default=3, metavar='N', help='number of times for executung the whole script') parser.add_argument('--not-random-dir', action='store_true', help='randomize dir') parser.add_argument('--dropout', type=float, default=0.5, metavar='WD', help='dropout rate for fully-connected layers') parser.add_argument('--not-save-weights', action='store_true', help='not save weights') parser.add_argument('--lr-shed', type=str, default='standard', metavar='LRSHED', help='lr shedule name (default: standard)') parser.add_argument('--shorten_dataset', action='store_true', help='same train set of size N/num_nets for each net') args = parser.parse_args() letters = string.ascii_lowercase exp_label = "%s_%s/%s" % (args.dataset, args.model, args.comment) if args.num_exps > 1: if not args.not_random_dir: exp_label += "_%s/" % ''.join( random.choice(letters) for i in range(5)) else: exp_label += "/" np.random.seed(args.seed) for exp_num in range(args.num_exps): args.seed = np.random.randint(1000) fmt_list = [('lr', "3.4e"), ('tr_loss', "3.3e"), ('tr_acc', '9.4f'), \ ('te_nll', "3.3e"), ('te_acc', '9.4f'), ('ens_acc', '9.4f'), ('ens_nll', '3.3e'), ('time', ".3f")] fmt = dict(fmt_list) log = logger.Logger(exp_label, fmt=fmt, base=args.dir) log.print(" ".join(sys.argv)) log.print(args) torch.backends.cudnn.benchmark = True torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) loaders, num_classes = data.loaders(args.dataset, args.data_path, args.batch_size, args.num_workers, args.transform, args.use_test) if args.shorten_dataset: loaders["train"].dataset.targets = loaders[ "train"].dataset.targets[:5000] loaders["train"].dataset.data = loaders[ "train"].dataset.data[:5000] architecture = getattr(models, args.model)() architecture.kwargs["k"] = args.width if "VGG" in args.model or "WideResNet" in args.model: architecture.kwargs["p"] = args.dropout if args.lr_shed == "standard": def learning_rate_schedule(base_lr, epoch, total_epochs): alpha = epoch / total_epochs if alpha <= 0.5: factor = 1.0 elif alpha <= 0.9: factor = 1.0 - (alpha - 0.5) / 0.4 * 0.99 else: factor = 0.01 return factor * base_lr elif args.lr_shed == "stair": def learning_rate_schedule(base_lr, epoch, total_epochs): if epoch < total_epochs / 2: factor = 1.0 else: factor = 0.1 return factor * base_lr elif args.lr_shed == "exp": def learning_rate_schedule(base_lr, epoch, total_epochs): factor = 0.9885**epoch return factor * base_lr criterion = F.cross_entropy regularizer = None ensemble_size = 0 predictions_sum = np.zeros((len(loaders['test'].dataset), num_classes)) for num_model in range(args.num_nets): model = architecture.base(num_classes=num_classes, **architecture.kwargs) model.cuda() optimizer = torch.optim.SGD(filter( lambda param: param.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.wd) start_epoch = 1 if args.resume is not None: print('Resume training from %s' % args.resume) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) has_bn = utils.check_bn(model) test_res = {'loss': None, 'accuracy': None, 'nll': None} for epoch in range(start_epoch, args.epochs + 1): time_ep = time.time() lr = learning_rate_schedule(args.lr, epoch, args.epochs) utils.adjust_learning_rate(optimizer, lr) train_res = utils.train(loaders['train'], model, optimizer, criterion, regularizer) ens_acc = None ens_nll = None if epoch == args.epochs: predictions_logits, targets = utils.predictions( loaders['test'], model) predictions = F.softmax( torch.from_numpy(predictions_logits), dim=1).numpy() predictions_sum = ensemble_size/(ensemble_size+1) \ * predictions_sum+\ predictions/(ensemble_size+1) ensemble_size += 1 ens_acc = 100.0 * np.mean( np.argmax(predictions_sum, axis=1) == targets) predictions_sum_log = np.log(predictions_sum + 1e-15) ens_nll = -metrics.metrics_kfold(predictions_sum_log, targets, n_splits=2, n_runs=5,\ verbose=False, temp_scale=True)["ll"] np.save(log.path + '/predictions_run%d' % num_model, predictions_logits) if not args.not_save_weights and epoch % args.save_freq == 0: utils.save_checkpoint( log.get_checkpoint(epoch), epoch, model_state=model.state_dict(), optimizer_state=optimizer.state_dict()) time_ep = time.time() - time_ep if epoch % args.print_freq == 0: test_res = utils.test(loaders['test'], model, \ criterion, regularizer) values = [ lr, train_res['loss'], train_res['accuracy'], test_res['nll'], test_res['accuracy'], ens_acc, ens_nll, time_ep ] for (k, _), v in zip(fmt_list, values): log.add(epoch, **{k: v}) log.iter_info() log.save(silent=True) if not args.not_save_weights: utils.save_checkpoint(log.path + '/model_run%d.cpt' % num_model, args.epochs, model_state=model.state_dict(), optimizer_state=optimizer.state_dict()) return log.path
#sorry this is hardcoded for now if args.dataset == 'CIFAR10.1': #from torchvision import transforms import sys sys.path.append('/home/wm326/CIFAR-10.1/code') from cifar10_1_dataset import cifar10_1 dataset = cifar10_1(transform=model_cfg.transform_test) test_data_loader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, num_workers=args.num_workers) loaders, num_classes = data.loaders(args.dataset[:-2], args.data_path, args.batch_size, args.num_workers, model_cfg.transform_train, model_cfg.transform_test, use_validation=not args.use_test, split_classes=None) loaders['test'] = test_data_loader else: print('Loading dataset %s from %s' % (args.dataset, args.data_path)) loaders, num_classes = data.loaders(args.dataset, args.data_path, args.batch_size, args.num_workers, model_cfg.transform_train, model_cfg.transform_test, use_validation=not args.use_test, split_classes=None)