def __init__(self, input_shape, architecture_args, pretrained_arg=None, device='cuda', loss_function='ce', add_noise=False, noise_type='Gaussian', noise_std=0.0, loss_function_param=None, load_from=None, **kwargs): super(StandardClassifier, self).__init__(**kwargs) self.args = { 'input_shape': input_shape, 'architecture_args': architecture_args, 'pretrained_arg': pretrained_arg, 'device': device, 'loss_function': loss_function, 'add_noise': add_noise, 'noise_type': noise_type, 'noise_std': noise_std, 'loss_function_param': loss_function_param, 'load_from': load_from, 'class': 'StandardClassifier' } assert len(input_shape) == 3 self.input_shape = [None] + list(input_shape) self.architecture_args = architecture_args self.pretrained_arg = pretrained_arg self.device = device self.loss_function = loss_function self.add_noise = add_noise self.noise_type = noise_type self.noise_std = noise_std self.loss_function_param = loss_function_param self.load_from = load_from # initialize the network self.repr_net = pretrained_models.get_pretrained_model( self.pretrained_arg, self.input_shape, self.device) self.repr_shape = self.repr_net.output_shape self.classifier, output_shape = nn_utils.parse_feed_forward( args=self.architecture_args['classifier'], input_shape=self.repr_shape) self.num_classes = output_shape[-1] self.classifier = self.classifier.to(self.device) self.grad_noise_class = nn_utils.get_grad_noise_class( standard_dev=noise_std, q_dist=noise_type) if self.load_from is not None: print("Loading the classifier model from {}".format(load_from)) stored_net = utils.load(load_from, device='cpu') stored_net_params = dict(stored_net.classifier.named_parameters()) for key, param in self.classifier.named_parameters(): param.data = stored_net_params[key].data.to(self.device)
def __init__(self, path, device): super(PretrainedVAE, self).__init__() self.vae = utils.load(path, device=device) self.output_shape = [None, 128] # freeze weights params = dict(self.vae.named_parameters()) for name, param in self.vae.named_parameters(): params[name].requires_grad = False
def __init__(self, num_classes=2, pretrained=True, device="cuda", loss_function="ce", add_noise=False, noise_type="Gaussian", noise_std=0.0, loss_function_param=None, load_from=None, **kwargs): super(CoverModel, self).__init__(**kwargs) self.args = { "num_classes": num_classes, "pretrained": pretrained, "device": device, "loss_function": loss_function, "add_noise": add_noise, "noise_type": noise_type, "noise_std": noise_std, "loss_function_param": loss_function_param, "load_from": load_from, "class": "CoverModel", } self.device = device self.loss_function = loss_function self.add_noise = add_noise self.noise_type = noise_type self.noise_std = noise_std self.loss_function_param = loss_function_param self.load_from = load_from self.num_classes = num_classes # initialize the network feaure_channels = 2048 # 2048 for resnet50, 512 for resnet18 self.classifier = nn.ModuleDict({ "backbone": resnet.resnet50(pretrained), "fc": nn.Linear(feaure_channels, num_classes, bias=False), }) self.classifier = self.classifier.to(self.device) self.grad_noise_class = nn_utils.get_grad_noise_class( standard_dev=noise_std, q_dist=noise_type) if self.load_from is not None: print("Loading the classifier model from {}".format(load_from)) stored_net = utils.load(load_from, device="cpu") stored_net_params = dict(stored_net.classifier.named_parameters()) for key, param in self.classifier.named_parameters(): param.data = stored_net_params[key].data.to(self.device)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--device', '-d', default='cuda') parser.add_argument('--batch_size', '-b', type=int, default=256) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--dataset', '-D', type=str, default='mnist', choices=['mnist', 'cifar10', 'cifar100', 'clothing1m', 'imagenet']) parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation') parser.set_defaults(data_augmentation=False) parser.add_argument('--num_train_examples', type=int, default=None) parser.add_argument('--label_noise_level', '-n', type=float, default=0.0) parser.add_argument('--label_noise_type', type=str, default='flip', choices=['flip', 'error', 'cifar10_custom']) parser.add_argument('--transform_function', type=str, default=None, choices=[None, 'remove_random_chunks']) parser.add_argument('--clean_validation', dest='clean_validation', action='store_true') parser.set_defaults(clean_validation=False) parser.add_argument('--remove_prob', type=float, default=0.5) parser.add_argument('--load_from', type=str, default=None, required=True) parser.add_argument('--output_dir', '-o', type=str, default=None) args = parser.parse_args() print(args) # Load data _, _, test_loader = datasets.load_data_from_arguments(args) print(f"Testing the model saved at {args.load_from}") model = utils.load(args.load_from, device=args.device) ret = utils.apply_on_dataset(model, test_loader.dataset, batch_size=args.batch_size, output_keys_regexp='pred|label', description='Testing') pred = ret['pred'] labels = ret['label'] if args.output_dir is not None: with open(os.path.join(args.output_dir, 'test_predictions.pkl'), 'wb') as f: pickle.dump({'pred': pred, 'labels': labels}, f) accuracy = torch.mean((pred.argmax(dim=1) == labels).float()) print(accuracy) if args.output_dir is not None: with open(os.path.join(args.output_dir, 'test_accuracy.txt'), 'w') as f: f.write("{}\n".format(accuracy))
def estimate_transition(load_from, data_loader, device="cpu", batch_size=256): """ Estimates the label noise matrix. The code is adapted form the original implementation. Source: https://github.com/giorgiop/loss-correction/. """ assert load_from is not None model = utils.load(load_from, device=device) pred = utils.apply_on_dataset( model=model, dataset=data_loader.dataset, batch_size=batch_size, cpu=True, description="Estimating transition matrix", output_keys_regexp="pred", )["pred"] pred = torch.softmax(pred, dim=1) pred = utils.to_numpy(pred) c = model.num_classes T = np.zeros((c, c)) filter_outlier = True # find a 'perfect example' for each class for i in range(c): if not filter_outlier: idx_best = np.argmax(pred[:, i]) else: thresh = np.percentile(pred[:, i], 97, interpolation="higher") robust_eta = pred[:, i] robust_eta[robust_eta >= thresh] = 0.0 idx_best = np.argmax(robust_eta) for j in range(c): T[i, j] = pred[idx_best, j] # row normalize row_sums = T.sum(axis=1, keepdims=True) T /= row_sums T = torch.tensor(T, dtype=torch.float).to(device) print(T) return T
def __init__( self, input_shape, architecture_args, pretrained_arg=None, device="cuda", loss_function="ce", add_noise=False, noise_type="Gaussian", noise_std=0.0, loss_function_param=None, load_from=None, **kwargs ): super(StandardClassifier, self).__init__(**kwargs) self.args = { "input_shape": input_shape, "architecture_args": architecture_args, "pretrained_arg": pretrained_arg, "device": device, "loss_function": loss_function, "add_noise": add_noise, "noise_type": noise_type, "noise_std": noise_std, "loss_function_param": loss_function_param, "load_from": load_from, "class": "StandardClassifier", } assert len(input_shape) == 3 self.input_shape = [None] + list(input_shape) self.architecture_args = architecture_args self.pretrained_arg = pretrained_arg self.device = device self.loss_function = loss_function self.add_noise = add_noise self.noise_type = noise_type self.noise_std = noise_std self.loss_function_param = loss_function_param self.load_from = load_from # initialize the network self.repr_net = pretrained_models.get_pretrained_model( self.pretrained_arg, self.input_shape, self.device ) self.repr_shape = self.repr_net.output_shape self.classifier, output_shape = nn_utils.parse_feed_forward( args=self.architecture_args["classifier"], input_shape=self.repr_shape ) self.num_classes = output_shape[-1] self.classifier = self.classifier.to(self.device) self.grad_noise_class = nn_utils.get_grad_noise_class( standard_dev=noise_std, q_dist=noise_type ) if self.load_from is not None: print("Loading the classifier model from {}".format(load_from)) stored_net = utils.load(load_from, device="cpu") stored_net_params = dict(stored_net.classifier.named_parameters()) for key, param in self.classifier.named_parameters(): param.data = stored_net_params[key].data.to(self.device)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', type=str, required=True) parser.add_argument('--device', '-d', default='cuda') parser.add_argument('--batch_size', '-b', type=int, default=256) parser.add_argument('--epochs', '-e', type=int, default=400) parser.add_argument('--stopping_param', type=int, default=50) parser.add_argument('--save_iter', '-s', type=int, default=10) parser.add_argument('--vis_iter', '-v', type=int, default=10) parser.add_argument('--log_dir', '-l', type=str, default=None) parser.add_argument('--seed', type=int, default=42) parser.add_argument( '--dataset', '-D', type=str, default='mnist', choices=['mnist', 'cifar10', 'cifar100', 'clothing1m', 'imagenet']) parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation') parser.set_defaults(data_augmentation=False) parser.add_argument('--num_train_examples', type=int, default=None) parser.add_argument('--label_noise_level', '-n', type=float, default=0.0) parser.add_argument('--label_noise_type', type=str, default='error', choices=['error', 'cifar10_custom']) parser.add_argument('--transform_function', type=str, default=None, choices=[None, 'remove_random_chunks']) parser.add_argument('--clean_validation', dest='clean_validation', action='store_true') parser.set_defaults(clean_validation=False) parser.add_argument('--remove_prob', type=float, default=0.5) parser.add_argument('--model_class', '-m', type=str, default='StandardClassifier') parser.add_argument( '--loss_function', type=str, default='ce', choices=['ce', 'mse', 'mae', 'gce', 'dmi', 'fw', 'none']) parser.add_argument('--loss_function_param', type=float, default=1.0) parser.add_argument('--load_from', type=str, default=None) parser.add_argument('--grad_weight_decay', '-L', type=float, default=0.0) parser.add_argument('--grad_l1_penalty', '-S', type=float, default=0.0) parser.add_argument('--lamb', type=float, default=1.0) parser.add_argument('--pretrained_arg', '-r', type=str, default=None) parser.add_argument('--sample_from_q', action='store_true', dest='sample_from_q') parser.set_defaults(sample_from_q=False) parser.add_argument('--q_dist', type=str, default='Gaussian', choices=['Gaussian', 'Laplace', 'dot']) parser.add_argument('--no-detach', dest='detach', action='store_false') parser.set_defaults(detach=True) parser.add_argument('--warm_up', type=int, default=0, help='Number of epochs to skip before ' 'starting to train using predicted gradients') parser.add_argument('--weight_decay', type=float, default=0.0) parser.add_argument( '--add_noise', action='store_true', dest='add_noise', help='add noise to the gradients of a standard classifier.') parser.set_defaults(add_noise=False) parser.add_argument('--noise_type', type=str, default='Gaussian', choices=['Gaussian', 'Laplace']) parser.add_argument('--noise_std', type=float, default=0.0) parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate') args = parser.parse_args() print(args) # Load data train_loader, val_loader, test_loader = datasets.load_data_from_arguments( args) # Options optimization_args = { 'optimizer': { 'name': 'adam', 'lr': args.lr, 'weight_decay': args.weight_decay } } # optimization_args = { # 'optimizer': { # 'name': 'sgd', # 'lr': 1e-3, # }, # 'scheduler': { # 'step_size': 15, # 'gamma': 1.25 # } # } with open(args.config, 'r') as f: architecture_args = json.load(f) model_class = getattr(methods, args.model_class) model = model_class(input_shape=train_loader.dataset[0][0].shape, architecture_args=architecture_args, pretrained_arg=args.pretrained_arg, device=args.device, grad_weight_decay=args.grad_weight_decay, grad_l1_penalty=args.grad_l1_penalty, lamb=args.lamb, sample_from_q=args.sample_from_q, q_dist=args.q_dist, load_from=args.load_from, loss_function=args.loss_function, loss_function_param=args.loss_function_param, add_noise=args.add_noise, noise_type=args.noise_type, noise_std=args.noise_std, detach=args.detach, warm_up=args.warm_up) metrics_list = [] if args.dataset == 'imagenet': metrics_list.append(metrics.TopKAccuracy(k=5, output_key='pred')) training.train(model=model, train_loader=train_loader, val_loader=val_loader, epochs=args.epochs, save_iter=args.save_iter, vis_iter=args.vis_iter, optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, stopping_param=args.stopping_param, metrics=metrics_list) # if training finishes successfully, compute the test score print("Testing the best validation model...") model = utils.load(os.path.join(args.log_dir, 'checkpoints', 'best_val.mdl'), device=args.device) pred = utils.apply_on_dataset(model, test_loader.dataset, batch_size=args.batch_size, output_keys_regexp='pred', description='Testing')['pred'] labels = [p[1] for p in test_loader.dataset] labels = torch.tensor(labels, dtype=torch.long) labels = utils.to_cpu(labels) with open(os.path.join(args.log_dir, 'test_predictions.pkl'), 'wb') as f: pickle.dump({'pred': pred, 'labels': labels}, f) accuracy = torch.mean((pred.argmax(dim=1) == labels).float()) with open(os.path.join(args.log_dir, 'test_accuracy.txt'), 'w') as f: f.write("{}\n".format(accuracy))
from modules.DataAndVisualization.vizualiser import ( plot_scatter as scatter, plot_parallel as parallel, ) from modules.utils import load # Visualizing the two bar truss problem # You can use the parallel plot with any dimension but the scatter will only work with 2 and 3 dimension. # objectives as a reminder: # weight, stress, buckling stress and deflection # First load the solution. For more details on on the solution check the readme file in modules/DataAndVisualization obj, var, nadir, ideal = load("tb4") # This problem is 3 dimensional as it is only optimizing weight, stress and buckling stress # so we can use a scatter or a parallel plot to visualize it. We will use a scatter plot # Set the axis names accordingly axis_names = ["weight", "Stress", "buckling stress"] # (3D) Scatter plot scatter(obj, axis_names) # An example on parallel plots # Parallel plot will open a new browser window and display the plot there. # Only 1000 random samples are chosen for the plot # You can choose an axis ranges to highlight solutions that fall in those ranges interactively
def main(): parser = argparse.ArgumentParser() parser.add_argument("--device", "-d", default="cuda") parser.add_argument("--batch_size", "-b", type=int, default=256) parser.add_argument("--seed", type=int, default=42) parser.add_argument( "--dataset", "-D", type=str, default="mnist", choices=["mnist", "cifar10", "cifar100", "clothing1m", "imagenet"], ) parser.add_argument( "--data_augmentation", "-A", action="store_true", dest="data_augmentation" ) parser.set_defaults(data_augmentation=False) parser.add_argument("--num_train_examples", type=int, default=None) parser.add_argument("--label_noise_level", "-n", type=float, default=0.0) parser.add_argument( "--label_noise_type", type=str, default="flip", choices=["flip", "error", "cifar10_custom"], ) parser.add_argument( "--transform_function", type=str, default=None, choices=[None, "remove_random_chunks"], ) parser.add_argument( "--clean_validation", dest="clean_validation", action="store_true" ) parser.set_defaults(clean_validation=False) parser.add_argument("--remove_prob", type=float, default=0.5) parser.add_argument("--load_from", type=str, default=None, required=True) parser.add_argument("--output_dir", "-o", type=str, default=None) args = parser.parse_args() print(args) # Load data _, _, test_loader = datasets.load_data_from_arguments(args) print(f"Testing the model saved at {args.load_from}") model = utils.load(args.load_from, device=args.device) ret = utils.apply_on_dataset( model, test_loader.dataset, batch_size=args.batch_size, output_keys_regexp="pred|label", description="Testing", ) pred = ret["pred"] labels = ret["label"] if args.output_dir is not None: with open(os.path.join(args.output_dir, "test_predictions.pkl"), "wb") as f: pickle.dump({"pred": pred, "labels": labels}, f) accuracy = torch.mean((pred.argmax(dim=1) == labels).float()) print(accuracy) if args.output_dir is not None: with open(os.path.join(args.output_dir, "test_accuracy.txt"), "w") as f: f.write("{}\n".format(accuracy))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config", "-c", type=str, required=True) parser.add_argument("--device", "-d", default="cuda") parser.add_argument("--batch_size", "-b", type=int, default=128) parser.add_argument("--epochs", "-e", type=int, default=4000) parser.add_argument("--stopping_param", type=int, default=2**30) parser.add_argument("--save_iter", "-s", type=int, default=100) parser.add_argument("--vis_iter", "-v", type=int, default=10) parser.add_argument("--log_dir", "-l", type=str, default=None) parser.add_argument("--seed", type=int, default=42) parser.add_argument( "--dataset", "-D", type=str, default="cifar10", choices=["mnist", "cifar10", "cifar100", "clothing1m", "imagenet"], ) parser.add_argument("--data_augmentation", "-A", action="store_true", dest="data_augmentation") parser.set_defaults(data_augmentation=False) parser.add_argument("--num_train_examples", type=int, default=None) parser.add_argument("--label_noise_level", "-n", type=float, default=0.0) parser.add_argument( "--label_noise_type", type=str, default="error", choices=["error", "cifar10_custom"], ) parser.add_argument( "--transform_function", type=str, default=None, choices=[None, "remove_random_chunks"], ) parser.add_argument("--clean_validation", dest="clean_validation", action="store_true") parser.set_defaults(clean_validation=False) parser.add_argument("--remove_prob", type=float, default=0.5) parser.add_argument("--model_class", "-m", type=str, default="StandardClassifier") parser.add_argument("--load_from", type=str, default=None) parser.add_argument("--grad_weight_decay", "-L", type=float, default=0.0) parser.add_argument("--lamb", type=float, default=1.0) parser.add_argument("--pretrained_arg", "-r", type=str, default=None) parser.add_argument("--sample_from_q", action="store_true", dest="sample_from_q") parser.set_defaults(sample_from_q=False) parser.add_argument("--q_dist", type=str, default="Gaussian", choices=["Gaussian", "Laplace", "dot"]) parser.add_argument("--weight_decay", type=float, default=0.0) parser.add_argument("--lr", type=float, default=1e-4, help="Learning rate") parser.add_argument( "--k", "-k", type=int, required=False, default=10, help="width parameter of ResNet18-k", ) parser.add_argument("--exclude_percent", type=float, default=0.0) args = parser.parse_args() print(args) # Load data train_loader, val_loader, test_loader = datasets.load_data_from_arguments( args) # Options optimization_args = { "optimizer": { "name": "adam", "lr": args.lr, "weight_decay": args.weight_decay } } with open(args.config, "r") as f: architecture_args = json.load(f) # set the width parameter k if ("classifier" in architecture_args and architecture_args["classifier"].get( "net", "").find("double-descent") != -1): architecture_args["classifier"]["k"] = args.k if ("q-network" in architecture_args and architecture_args["classifier"].get( "net", "").find("double-descent") != -1): architecture_args["q-network"]["k"] = args.k model_class = getattr(methods, args.model_class) model = model_class( input_shape=train_loader.dataset[0][0].shape, architecture_args=architecture_args, pretrained_arg=args.pretrained_arg, device=args.device, grad_weight_decay=args.grad_weight_decay, lamb=args.lamb, sample_from_q=args.sample_from_q, q_dist=args.q_dist, load_from=args.load_from, loss_function="ce", ) training.train( model=model, train_loader=train_loader, val_loader=val_loader, epochs=args.epochs, save_iter=args.save_iter, vis_iter=args.vis_iter, optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, stopping_param=args.stopping_param, ) # test the last model and best model models_to_test = [ { "name": "best", "file": "best_val.mdl" }, { "name": "final", "file": "final.mdl" }, ] for spec in models_to_test: print("Testing the {} model...".format(spec["name"])) model = utils.load(os.path.join(args.log_dir, "checkpoints", spec["file"]), device=args.device) pred = utils.apply_on_dataset( model, test_loader.dataset, batch_size=args.batch_size, output_keys_regexp="pred", description="Testing", )["pred"] labels = [p[1] for p in test_loader.dataset] labels = torch.tensor(labels, dtype=torch.long) labels = utils.to_cpu(labels) with open( os.path.join(args.log_dir, "{}_test_predictions.pkl".format(spec["name"])), "wb", ) as f: pickle.dump({"pred": pred, "labels": labels}, f) accuracy = torch.mean((pred.argmax(dim=1) == labels).float()) with open( os.path.join(args.log_dir, "{}_test_accuracy.txt".format(spec["name"])), "w") as f: f.write("{}\n".format(accuracy))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config", "-c", type=str, default=None) parser.add_argument("--device", "-d", default="cuda") parser.add_argument("--batch_size", "-b", type=int, default=256) parser.add_argument("--epochs", "-e", type=int, default=400) parser.add_argument("--stopping_param", type=int, default=50) parser.add_argument("--save_iter", "-s", type=int, default=10) parser.add_argument("--vis_iter", "-v", type=int, default=10) parser.add_argument("--log_dir", "-l", type=str, default=None) parser.add_argument("--seed", type=int, default=42) parser.add_argument( "--dataset", "-D", type=str, default="mnist", choices=[ "mnist", "cifar10", "cifar100", "clothing1m", "imagenet", "cover" ], ) # parser.add_argument("--image-root", metavar="DIR", help="path to images") # parser.add_argument("--label", metavar="DIR", help="path to label file") parser.add_argument("--data_augmentation", "-A", action="store_true", dest="data_augmentation") parser.set_defaults(data_augmentation=False) parser.add_argument("--num_train_examples", type=int, default=None) parser.add_argument("--label_noise_level", "-n", type=float, default=0.0) parser.add_argument( "--label_noise_type", type=str, default="error", choices=["error", "cifar10_custom"], ) parser.add_argument( "--transform_function", type=str, default=None, choices=[None, "remove_random_chunks"], ) parser.add_argument("--clean_validation", dest="clean_validation", action="store_true") parser.set_defaults(clean_validation=False) parser.add_argument("--remove_prob", type=float, default=0.5) parser.add_argument("--model_class", "-m", type=str, default="StandardClassifier") parser.add_argument( "--loss_function", type=str, default="ce", choices=["ce", "mse", "mae", "gce", "dmi", "fw", "none"], ) parser.add_argument("--loss_function_param", type=float, default=1.0) parser.add_argument("--load_from", type=str, default=None) parser.add_argument("--grad_weight_decay", "-L", type=float, default=0.0) parser.add_argument("--grad_l1_penalty", "-S", type=float, default=0.0) parser.add_argument("--lamb", type=float, default=1.0) parser.add_argument("--pretrained_arg", "-r", type=str, default=None) parser.add_argument("--sample_from_q", action="store_true", dest="sample_from_q") parser.set_defaults(sample_from_q=False) parser.add_argument("--q_dist", type=str, default="Gaussian", choices=["Gaussian", "Laplace", "dot"]) parser.add_argument("--no-detach", dest="detach", action="store_false") parser.set_defaults(detach=True) parser.add_argument( "--warm_up", type=int, default=0, help="Number of epochs to skip before " "starting to train using predicted gradients", ) parser.add_argument("--weight_decay", type=float, default=0.0) parser.add_argument( "--add_noise", action="store_true", dest="add_noise", help="add noise to the gradients of a standard classifier.", ) parser.set_defaults(add_noise=False) parser.add_argument("--noise_type", type=str, default="Gaussian", choices=["Gaussian", "Laplace"]) parser.add_argument("--noise_std", type=float, default=0.0) parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate") args = parser.parse_args() print(args) # Load data args.image_root = [ "/data/chenlong.1024/5198852-tiktok-1w_images", "/data/chenlong.1024/5205046-tiktok-10w_images", "/data/chenlong.1024/5599074-tiktok-impr_cnt20_images", "/data/chenlong.1024/5600297-tiktok-impr_cnt10_images", ] args.label = [ "/data/chenlong.1024/5198852-tiktok-1w.csv", "/data/chenlong.1024/5205046-tiktok-10w.csv", "/data/chenlong.1024/5599074-tiktok-impr_cnt20.csv", "/data/chenlong.1024/5600297-tiktok-impr_cnt10.csv", ] train_loader, val_loader, test_loader = datasets.load_data_from_arguments( args) # Options optimization_args = { "optimizer": { "name": "adam", "lr": args.lr, "weight_decay": args.weight_decay }, "scheduler": { "step_size": 20, "gamma": 0.3 }, } # optimization_args = { # 'optimizer': { # 'name': 'sgd', # 'lr': 1e-3, # }, # 'scheduler': { # 'step_size': 15, # 'gamma': 1.25 # } # } model_class = getattr(methods, args.model_class) if "CoverModel" in args.model_class: model = model_class( num_classes=2, pretrained=True, device=args.device, grad_weight_decay=args.grad_weight_decay, grad_l1_penalty=args.grad_l1_penalty, lamb=args.lamb, sample_from_q=args.sample_from_q, q_dist=args.q_dist, load_from=args.load_from, loss_function=args.loss_function, loss_function_param=args.loss_function_param, add_noise=args.add_noise, noise_type=args.noise_type, noise_std=args.noise_std, detach=args.detach, warm_up=args.warm_up, ) else: with open(args.config, "r") as f: architecture_args = json.load(f) model = model_class( input_shape=train_loader.dataset[0][0].shape, architecture_args=architecture_args, pretrained_arg=args.pretrained_arg, device=args.device, grad_weight_decay=args.grad_weight_decay, grad_l1_penalty=args.grad_l1_penalty, lamb=args.lamb, sample_from_q=args.sample_from_q, q_dist=args.q_dist, load_from=args.load_from, loss_function=args.loss_function, loss_function_param=args.loss_function_param, add_noise=args.add_noise, noise_type=args.noise_type, noise_std=args.noise_std, detach=args.detach, warm_up=args.warm_up, ) metrics_list = [] if args.dataset == "imagenet": metrics_list.append(metrics.TopKAccuracy(k=5, output_key="pred")) training.train( model=model, train_loader=train_loader, val_loader=val_loader, epochs=args.epochs, save_iter=args.save_iter, vis_iter=args.vis_iter, optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, stopping_param=args.stopping_param, metrics=metrics_list, ) # if training finishes successfully, compute the test score print("Testing the best validation model...") model = utils.load(os.path.join(args.log_dir, "checkpoints", "best_val.mdl"), device=args.device) pred = utils.apply_on_dataset( model, test_loader.dataset, batch_size=args.batch_size, output_keys_regexp="pred", description="Testing", )["pred"] labels = [p[1] for p in test_loader.dataset] labels = torch.tensor(labels, dtype=torch.long) labels = utils.to_cpu(labels) with open(os.path.join(args.log_dir, "test_predictions.pkl"), "wb") as f: pickle.dump({"pred": pred, "labels": labels}, f) accuracy = torch.mean((pred.argmax(dim=1) == labels).float()) with open(os.path.join(args.log_dir, "test_accuracy.txt"), "w") as f: f.write("{}\n".format(accuracy))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', type=str, required=True) parser.add_argument('--device', '-d', default='cuda') parser.add_argument('--batch_size', '-b', type=int, default=128) parser.add_argument('--epochs', '-e', type=int, default=4000) parser.add_argument('--stopping_param', type=int, default=2**30) parser.add_argument('--save_iter', '-s', type=int, default=100) parser.add_argument('--vis_iter', '-v', type=int, default=10) parser.add_argument('--log_dir', '-l', type=str, default=None) parser.add_argument('--seed', type=int, default=42) parser.add_argument( '--dataset', '-D', type=str, default='cifar10', choices=['mnist', 'cifar10', 'cifar100', 'clothing1m', 'imagenet']) parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation') parser.set_defaults(data_augmentation=False) parser.add_argument('--num_train_examples', type=int, default=None) parser.add_argument('--label_noise_level', '-n', type=float, default=0.0) parser.add_argument('--label_noise_type', type=str, default='error', choices=['error', 'cifar10_custom']) parser.add_argument('--transform_function', type=str, default=None, choices=[None, 'remove_random_chunks']) parser.add_argument('--clean_validation', dest='clean_validation', action='store_true') parser.set_defaults(clean_validation=False) parser.add_argument('--remove_prob', type=float, default=0.5) parser.add_argument('--model_class', '-m', type=str, default='StandardClassifier') parser.add_argument('--load_from', type=str, default=None) parser.add_argument('--grad_weight_decay', '-L', type=float, default=0.0) parser.add_argument('--lamb', type=float, default=1.0) parser.add_argument('--pretrained_arg', '-r', type=str, default=None) parser.add_argument('--sample_from_q', action='store_true', dest='sample_from_q') parser.set_defaults(sample_from_q=False) parser.add_argument('--q_dist', type=str, default='Gaussian', choices=['Gaussian', 'Laplace', 'dot']) parser.add_argument('--weight_decay', type=float, default=0.0) parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate') parser.add_argument('--k', '-k', type=int, required=True, default=10, help='width parameter of ResNet18-k') args = parser.parse_args() print(args) # Load data train_loader, val_loader, test_loader = datasets.load_data_from_arguments( args) # Options optimization_args = { 'optimizer': { 'name': 'adam', 'lr': args.lr, 'weight_decay': args.weight_decay } } with open(args.config, 'r') as f: architecture_args = json.load(f) # set the width parameter k if ('classifier' in architecture_args and architecture_args['classifier'].get( 'net', '') == 'double-descent-cifar10-resnet18'): architecture_args['classifier']['k'] = args.k if ('q-network' in architecture_args and architecture_args['q-network'].get( 'net', '') == 'double-descent-cifar10-resnet18'): architecture_args['q-network']['k'] = args.k model_class = getattr(methods, args.model_class) model = model_class(input_shape=train_loader.dataset[0][0].shape, architecture_args=architecture_args, pretrained_arg=args.pretrained_arg, device=args.device, grad_weight_decay=args.grad_weight_decay, lamb=args.lamb, sample_from_q=args.sample_from_q, q_dist=args.q_dist, load_from=args.load_from, loss_function='ce') training.train(model=model, train_loader=train_loader, val_loader=val_loader, epochs=args.epochs, save_iter=args.save_iter, vis_iter=args.vis_iter, optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, stopping_param=args.stopping_param) # test the last model and best model models_to_test = [{ 'name': 'best', 'file': 'best_val.mdl' }, { 'name': 'final', 'file': 'final.mdl' }] for spec in models_to_test: print("Testing the {} model...".format(spec['name'])) model = utils.load(os.path.join(args.log_dir, 'checkpoints', spec['file']), device=args.device) pred = utils.apply_on_dataset(model, test_loader.dataset, batch_size=args.batch_size, output_keys_regexp='pred', description='Testing')['pred'] labels = [p[1] for p in test_loader.dataset] labels = torch.tensor(labels, dtype=torch.long) labels = utils.to_cpu(labels) with open( os.path.join(args.log_dir, '{}_test_predictions.pkl'.format(spec['name'])), 'wb') as f: pickle.dump({'pred': pred, 'labels': labels}, f) accuracy = torch.mean((pred.argmax(dim=1) == labels).float()) with open( os.path.join(args.log_dir, '{}_test_accuracy.txt'.format(spec['name'])), 'w') as f: f.write("{}\n".format(accuracy))
from modules.DataAndVisualization.vizualiser import ( visualize as interactive_scatter, plot_parallel as parallel, ) from modules.utils import load # Visualizing the geometry design problem # Objectives as a reminder # surface area, volume, min height and floor area # First load the solution. For more details on on the solution check the readme file in modules/DataAndVisualization obj, var, _nadir, _ideal = load("gd1") # You can ignore this part # Make sure all values are positive as some of the objectives may be flipped # Because setting pfront to true in creating problem method will flip some values. obj = abs(obj) # Axis names for the plot axis_names = ["Surface area", "Volume", "Min height", "Floor area"] # As this is a 4d solution we'll have to use a parallel plot: # parallel(obj, axis_names) # Geometry design problems supports 2 and 3 dimensional interactive scatter plots: # Click a point => Corresponging tent will be plotted and values will be printed to console # => close the tent plot => back to objectives plot # Load a 3d problem obj, var, _nadir, _ideal = load("gd2")
def __init__(self, input_shape, architecture_args, pretrained_arg=None, device='cuda', grad_weight_decay=0.0, grad_l1_penalty=0.0, lamb=1.0, sample_from_q=False, q_dist='Gaussian', loss_function='ce', detach=True, load_from=None, warm_up=0, **kwargs): super(PredictGradOutput, self).__init__(**kwargs) self.args = { 'input_shape': input_shape, 'architecture_args': architecture_args, 'pretrained_arg': pretrained_arg, 'device': device, 'grad_weight_decay': grad_weight_decay, 'grad_l1_penalty': grad_l1_penalty, 'lamb': lamb, 'sample_from_q': sample_from_q, 'q_dist': q_dist, 'loss_function': loss_function, 'detach': detach, 'load_from': load_from, 'warm_up': warm_up, 'class': 'PredictGradOutput' } assert len(input_shape) == 3 self.input_shape = [None] + list(input_shape) self.architecture_args = architecture_args self.pretrained_arg = pretrained_arg self.device = device self.grad_weight_decay = grad_weight_decay self.grad_l1_penalty = grad_l1_penalty self.lamb = lamb self.sample_from_q = sample_from_q self.q_dist = q_dist self.detach = detach self.loss_function = loss_function self.load_from = load_from self.warm_up = warm_up # lamb is the coefficient in front of the H(p,q) term. It controls the variance of predicted gradients. if self.q_dist == 'Gaussian': self.grad_replacement_class = nn_utils.get_grad_replacement_class( sample=self.sample_from_q, standard_dev=np.sqrt(1.0 / 2.0 / (self.lamb + 1e-12)), q_dist=self.q_dist) elif self.q_dist == 'Laplace': self.grad_replacement_class = nn_utils.get_grad_replacement_class( sample=self.sample_from_q, standard_dev=np.sqrt(2.0) / (self.lamb + 1e-6), q_dist=self.q_dist) elif self.q_dist == 'dot': assert not self.sample_from_q self.grad_replacement_class = nn_utils.get_grad_replacement_class( sample=False) else: raise NotImplementedError() # initialize the network self.classifier, output_shape = nn_utils.parse_feed_forward( args=self.architecture_args['classifier'], input_shape=self.input_shape) self.classifier = self.classifier.to(self.device) self.num_classes = output_shape[-1] if self.pretrained_arg is not None: q_base = pretrained_models.get_pretrained_model( self.pretrained_arg, self.input_shape, self.device) # create the trainable part of the q_network q_top = torch.nn.Sequential( torch.nn.Linear(q_base.output_shape[-1], 128), torch.nn.ReLU(inplace=True), torch.nn.Linear(128, self.num_classes)).to(self.device) self.q_network = torch.nn.Sequential(q_base, q_top) else: self.q_network, _ = nn_utils.parse_feed_forward( args=self.architecture_args['q-network'], input_shape=self.input_shape) self.q_network = self.q_network.to(self.device) if self.load_from is not None: print("Loading the gradient predictor model from {}".format( load_from)) stored_net = utils.load(load_from, device='cpu') stored_net_params = dict( stored_net.classifier.named_parameters()) for key, param in self.q_network.named_parameters(): param.data = stored_net_params[key].data.to(self.device) self.q_loss = None if self.loss_function == 'none': # predicted gradient has general form self.q_loss = torch.nn.Sequential( torch.nn.Linear(2 * self.num_classes, 128), torch.nn.ReLU(inplace=True), torch.nn.Linear(128, self.num_classes)).to(self.device)
from modules.DataAndVisualization.vizualiser import ( plot_scatter as scatter, visualize as interactive_scatter, plot_parallel as parallel, ) from modules.utils import load # Visualizing the geometry design problem with constant floor # First load the solution. For more details on on the solution check the readme file in modules/DataAndVisualization obj, var, nadir, ideal = load("gdcExample1") # You can ignore this part # Make sure all values are positive as some of the objectives may be flipped # Because setting pfront to true in creating problem method will flip some values. obj = abs(obj) # Axis names for the plot axis_names = ["Surface area", "Volume"] # Constant floor Geometry design problems supports interactive scatter plot as it is only 2 dimensional: # Click a point => Corresponging tent will be plotted and values will be printed to console # => close the tent plot => back to objectives plot interactive_scatter(obj, var, axis_names) # You can also use other plots: # Scatter scatter(obj, axis_names) # parallel, this is actually quite interesting
def __init__(self, input_shape, architecture_args, pretrained_arg=None, device="cuda", grad_weight_decay=0.0, grad_l1_penalty=0.0, lamb=1.0, sample_from_q=False, q_dist="Gaussian", loss_function="ce", detach=True, load_from=None, warm_up=0, **kwargs): super(PredictGradOutput, self).__init__(**kwargs) self.args = { "input_shape": input_shape, "architecture_args": architecture_args, "pretrained_arg": pretrained_arg, "device": device, "grad_weight_decay": grad_weight_decay, "grad_l1_penalty": grad_l1_penalty, "lamb": lamb, "sample_from_q": sample_from_q, "q_dist": q_dist, "loss_function": loss_function, "detach": detach, "load_from": load_from, "warm_up": warm_up, "class": "PredictGradOutput", } assert len(input_shape) == 3 self.input_shape = [None] + list(input_shape) self.architecture_args = architecture_args self.pretrained_arg = pretrained_arg self.device = device self.grad_weight_decay = grad_weight_decay self.grad_l1_penalty = grad_l1_penalty self.lamb = lamb self.sample_from_q = sample_from_q self.q_dist = q_dist self.detach = detach self.loss_function = loss_function self.load_from = load_from self.warm_up = warm_up # lamb is the coefficient in front of the H(p,q) term. It controls the variance of predicted gradients. if self.q_dist == "Gaussian": self.grad_replacement_class = nn_utils.get_grad_replacement_class( sample=self.sample_from_q, standard_dev=np.sqrt(1.0 / 2.0 / (self.lamb + 1e-12)), q_dist=self.q_dist, ) elif self.q_dist == "Laplace": self.grad_replacement_class = nn_utils.get_grad_replacement_class( sample=self.sample_from_q, standard_dev=np.sqrt(2.0) / (self.lamb + 1e-6), q_dist=self.q_dist, ) elif self.q_dist == "dot": assert not self.sample_from_q self.grad_replacement_class = nn_utils.get_grad_replacement_class( sample=False) else: raise NotImplementedError() # initialize the network self.classifier, output_shape = nn_utils.parse_feed_forward( args=self.architecture_args["classifier"], input_shape=self.input_shape) self.classifier = self.classifier.to(self.device) self.num_classes = output_shape[-1] if self.pretrained_arg is not None: q_base = pretrained_models.get_pretrained_model( self.pretrained_arg, self.input_shape, self.device) # create the trainable part of the q_network q_top = torch.nn.Sequential( torch.nn.Linear(q_base.output_shape[-1], 128), torch.nn.ReLU(inplace=True), torch.nn.Linear(128, self.num_classes), ).to(self.device) self.q_network = torch.nn.Sequential(q_base, q_top) else: self.q_network, _ = nn_utils.parse_feed_forward( args=self.architecture_args["q-network"], input_shape=self.input_shape) self.q_network = self.q_network.to(self.device) if self.load_from is not None: print("Loading the gradient predictor model from {}".format( load_from)) stored_net = utils.load(load_from, device="cpu") stored_net_params = dict( stored_net.classifier.named_parameters()) for key, param in self.q_network.named_parameters(): param.data = stored_net_params[key].data.to(self.device) self.q_loss = None if self.loss_function == "none": # predicted gradient has general form self.q_loss = torch.nn.Sequential( torch.nn.Linear(2 * self.num_classes, 128), torch.nn.ReLU(inplace=True), torch.nn.Linear(128, self.num_classes), ).to(self.device)