Ejemplo n.º 1
0
    def __init__(self,
                 input_shape,
                 architecture_args,
                 pretrained_arg=None,
                 device='cuda',
                 loss_function='ce',
                 add_noise=False,
                 noise_type='Gaussian',
                 noise_std=0.0,
                 loss_function_param=None,
                 load_from=None,
                 **kwargs):
        super(StandardClassifier, self).__init__(**kwargs)

        self.args = {
            'input_shape': input_shape,
            'architecture_args': architecture_args,
            'pretrained_arg': pretrained_arg,
            'device': device,
            'loss_function': loss_function,
            'add_noise': add_noise,
            'noise_type': noise_type,
            'noise_std': noise_std,
            'loss_function_param': loss_function_param,
            'load_from': load_from,
            'class': 'StandardClassifier'
        }

        assert len(input_shape) == 3
        self.input_shape = [None] + list(input_shape)
        self.architecture_args = architecture_args
        self.pretrained_arg = pretrained_arg
        self.device = device
        self.loss_function = loss_function
        self.add_noise = add_noise
        self.noise_type = noise_type
        self.noise_std = noise_std
        self.loss_function_param = loss_function_param
        self.load_from = load_from

        # initialize the network
        self.repr_net = pretrained_models.get_pretrained_model(
            self.pretrained_arg, self.input_shape, self.device)
        self.repr_shape = self.repr_net.output_shape
        self.classifier, output_shape = nn_utils.parse_feed_forward(
            args=self.architecture_args['classifier'],
            input_shape=self.repr_shape)
        self.num_classes = output_shape[-1]
        self.classifier = self.classifier.to(self.device)
        self.grad_noise_class = nn_utils.get_grad_noise_class(
            standard_dev=noise_std, q_dist=noise_type)

        if self.load_from is not None:
            print("Loading the classifier model from {}".format(load_from))
            stored_net = utils.load(load_from, device='cpu')
            stored_net_params = dict(stored_net.classifier.named_parameters())
            for key, param in self.classifier.named_parameters():
                param.data = stored_net_params[key].data.to(self.device)
Ejemplo n.º 2
0
    def __init__(self, path, device):
        super(PretrainedVAE, self).__init__()
        self.vae = utils.load(path, device=device)
        self.output_shape = [None, 128]

        # freeze weights
        params = dict(self.vae.named_parameters())
        for name, param in self.vae.named_parameters():
            params[name].requires_grad = False
Ejemplo n.º 3
0
    def __init__(self,
                 num_classes=2,
                 pretrained=True,
                 device="cuda",
                 loss_function="ce",
                 add_noise=False,
                 noise_type="Gaussian",
                 noise_std=0.0,
                 loss_function_param=None,
                 load_from=None,
                 **kwargs):
        super(CoverModel, self).__init__(**kwargs)

        self.args = {
            "num_classes": num_classes,
            "pretrained": pretrained,
            "device": device,
            "loss_function": loss_function,
            "add_noise": add_noise,
            "noise_type": noise_type,
            "noise_std": noise_std,
            "loss_function_param": loss_function_param,
            "load_from": load_from,
            "class": "CoverModel",
        }

        self.device = device
        self.loss_function = loss_function
        self.add_noise = add_noise
        self.noise_type = noise_type
        self.noise_std = noise_std
        self.loss_function_param = loss_function_param
        self.load_from = load_from
        self.num_classes = num_classes

        # initialize the network
        feaure_channels = 2048  # 2048 for resnet50, 512 for resnet18
        self.classifier = nn.ModuleDict({
            "backbone":
            resnet.resnet50(pretrained),
            "fc":
            nn.Linear(feaure_channels, num_classes, bias=False),
        })
        self.classifier = self.classifier.to(self.device)

        self.grad_noise_class = nn_utils.get_grad_noise_class(
            standard_dev=noise_std, q_dist=noise_type)

        if self.load_from is not None:
            print("Loading the classifier model from {}".format(load_from))
            stored_net = utils.load(load_from, device="cpu")
            stored_net_params = dict(stored_net.classifier.named_parameters())
            for key, param in self.classifier.named_parameters():
                param.data = stored_net_params[key].data.to(self.device)
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--device', '-d', default='cuda')

    parser.add_argument('--batch_size', '-b', type=int, default=256)
    parser.add_argument('--seed', type=int, default=42)

    parser.add_argument('--dataset', '-D', type=str, default='mnist',
                        choices=['mnist', 'cifar10', 'cifar100', 'clothing1m', 'imagenet'])
    parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation')
    parser.set_defaults(data_augmentation=False)
    parser.add_argument('--num_train_examples', type=int, default=None)
    parser.add_argument('--label_noise_level', '-n', type=float, default=0.0)
    parser.add_argument('--label_noise_type', type=str, default='flip',
                        choices=['flip', 'error', 'cifar10_custom'])
    parser.add_argument('--transform_function', type=str, default=None,
                        choices=[None, 'remove_random_chunks'])
    parser.add_argument('--clean_validation', dest='clean_validation', action='store_true')
    parser.set_defaults(clean_validation=False)
    parser.add_argument('--remove_prob', type=float, default=0.5)

    parser.add_argument('--load_from', type=str, default=None, required=True)
    parser.add_argument('--output_dir', '-o', type=str, default=None)

    args = parser.parse_args()
    print(args)

    # Load data
    _, _, test_loader = datasets.load_data_from_arguments(args)

    print(f"Testing the model saved at {args.load_from}")
    model = utils.load(args.load_from, device=args.device)
    ret = utils.apply_on_dataset(model, test_loader.dataset, batch_size=args.batch_size,
                                 output_keys_regexp='pred|label', description='Testing')
    pred = ret['pred']
    labels = ret['label']
    if args.output_dir is not None:
        with open(os.path.join(args.output_dir, 'test_predictions.pkl'), 'wb') as f:
            pickle.dump({'pred': pred, 'labels': labels}, f)

    accuracy = torch.mean((pred.argmax(dim=1) == labels).float())
    print(accuracy)
    if args.output_dir is not None:
        with open(os.path.join(args.output_dir, 'test_accuracy.txt'), 'w') as f:
            f.write("{}\n".format(accuracy))
def estimate_transition(load_from, data_loader, device="cpu", batch_size=256):
    """ Estimates the label noise matrix. The code is adapted form the original implementation.
    Source: https://github.com/giorgiop/loss-correction/.
    """
    assert load_from is not None
    model = utils.load(load_from, device=device)
    pred = utils.apply_on_dataset(
        model=model,
        dataset=data_loader.dataset,
        batch_size=batch_size,
        cpu=True,
        description="Estimating transition matrix",
        output_keys_regexp="pred",
    )["pred"]
    pred = torch.softmax(pred, dim=1)
    pred = utils.to_numpy(pred)

    c = model.num_classes
    T = np.zeros((c, c))
    filter_outlier = True

    # find a 'perfect example' for each class
    for i in range(c):
        if not filter_outlier:
            idx_best = np.argmax(pred[:, i])
        else:
            thresh = np.percentile(pred[:, i], 97, interpolation="higher")
            robust_eta = pred[:, i]
            robust_eta[robust_eta >= thresh] = 0.0
            idx_best = np.argmax(robust_eta)

        for j in range(c):
            T[i, j] = pred[idx_best, j]

    # row normalize
    row_sums = T.sum(axis=1, keepdims=True)
    T /= row_sums

    T = torch.tensor(T, dtype=torch.float).to(device)
    print(T)

    return T
Ejemplo n.º 6
0
    def __init__(
        self,
        input_shape,
        architecture_args,
        pretrained_arg=None,
        device="cuda",
        loss_function="ce",
        add_noise=False,
        noise_type="Gaussian",
        noise_std=0.0,
        loss_function_param=None,
        load_from=None,
        **kwargs
    ):
        super(StandardClassifier, self).__init__(**kwargs)

        self.args = {
            "input_shape": input_shape,
            "architecture_args": architecture_args,
            "pretrained_arg": pretrained_arg,
            "device": device,
            "loss_function": loss_function,
            "add_noise": add_noise,
            "noise_type": noise_type,
            "noise_std": noise_std,
            "loss_function_param": loss_function_param,
            "load_from": load_from,
            "class": "StandardClassifier",
        }

        assert len(input_shape) == 3
        self.input_shape = [None] + list(input_shape)
        self.architecture_args = architecture_args
        self.pretrained_arg = pretrained_arg
        self.device = device
        self.loss_function = loss_function
        self.add_noise = add_noise
        self.noise_type = noise_type
        self.noise_std = noise_std
        self.loss_function_param = loss_function_param
        self.load_from = load_from

        # initialize the network
        self.repr_net = pretrained_models.get_pretrained_model(
            self.pretrained_arg, self.input_shape, self.device
        )
        self.repr_shape = self.repr_net.output_shape
        self.classifier, output_shape = nn_utils.parse_feed_forward(
            args=self.architecture_args["classifier"], input_shape=self.repr_shape
        )
        self.num_classes = output_shape[-1]
        self.classifier = self.classifier.to(self.device)
        self.grad_noise_class = nn_utils.get_grad_noise_class(
            standard_dev=noise_std, q_dist=noise_type
        )

        if self.load_from is not None:
            print("Loading the classifier model from {}".format(load_from))
            stored_net = utils.load(load_from, device="cpu")
            stored_net_params = dict(stored_net.classifier.named_parameters())
            for key, param in self.classifier.named_parameters():
                param.data = stored_net_params[key].data.to(self.device)
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', '-c', type=str, required=True)
    parser.add_argument('--device', '-d', default='cuda')

    parser.add_argument('--batch_size', '-b', type=int, default=256)
    parser.add_argument('--epochs', '-e', type=int, default=400)
    parser.add_argument('--stopping_param', type=int, default=50)
    parser.add_argument('--save_iter', '-s', type=int, default=10)
    parser.add_argument('--vis_iter', '-v', type=int, default=10)
    parser.add_argument('--log_dir', '-l', type=str, default=None)
    parser.add_argument('--seed', type=int, default=42)

    parser.add_argument(
        '--dataset',
        '-D',
        type=str,
        default='mnist',
        choices=['mnist', 'cifar10', 'cifar100', 'clothing1m', 'imagenet'])
    parser.add_argument('--data_augmentation',
                        '-A',
                        action='store_true',
                        dest='data_augmentation')
    parser.set_defaults(data_augmentation=False)
    parser.add_argument('--num_train_examples', type=int, default=None)
    parser.add_argument('--label_noise_level', '-n', type=float, default=0.0)
    parser.add_argument('--label_noise_type',
                        type=str,
                        default='error',
                        choices=['error', 'cifar10_custom'])
    parser.add_argument('--transform_function',
                        type=str,
                        default=None,
                        choices=[None, 'remove_random_chunks'])
    parser.add_argument('--clean_validation',
                        dest='clean_validation',
                        action='store_true')
    parser.set_defaults(clean_validation=False)
    parser.add_argument('--remove_prob', type=float, default=0.5)

    parser.add_argument('--model_class',
                        '-m',
                        type=str,
                        default='StandardClassifier')
    parser.add_argument(
        '--loss_function',
        type=str,
        default='ce',
        choices=['ce', 'mse', 'mae', 'gce', 'dmi', 'fw', 'none'])
    parser.add_argument('--loss_function_param', type=float, default=1.0)
    parser.add_argument('--load_from', type=str, default=None)
    parser.add_argument('--grad_weight_decay', '-L', type=float, default=0.0)
    parser.add_argument('--grad_l1_penalty', '-S', type=float, default=0.0)
    parser.add_argument('--lamb', type=float, default=1.0)
    parser.add_argument('--pretrained_arg', '-r', type=str, default=None)
    parser.add_argument('--sample_from_q',
                        action='store_true',
                        dest='sample_from_q')
    parser.set_defaults(sample_from_q=False)
    parser.add_argument('--q_dist',
                        type=str,
                        default='Gaussian',
                        choices=['Gaussian', 'Laplace', 'dot'])
    parser.add_argument('--no-detach', dest='detach', action='store_false')
    parser.set_defaults(detach=True)
    parser.add_argument('--warm_up',
                        type=int,
                        default=0,
                        help='Number of epochs to skip before '
                        'starting to train using predicted gradients')
    parser.add_argument('--weight_decay', type=float, default=0.0)

    parser.add_argument(
        '--add_noise',
        action='store_true',
        dest='add_noise',
        help='add noise to the gradients of a standard classifier.')
    parser.set_defaults(add_noise=False)
    parser.add_argument('--noise_type',
                        type=str,
                        default='Gaussian',
                        choices=['Gaussian', 'Laplace'])
    parser.add_argument('--noise_std', type=float, default=0.0)

    parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate')
    args = parser.parse_args()
    print(args)

    # Load data
    train_loader, val_loader, test_loader = datasets.load_data_from_arguments(
        args)

    # Options
    optimization_args = {
        'optimizer': {
            'name': 'adam',
            'lr': args.lr,
            'weight_decay': args.weight_decay
        }
    }

    # optimization_args = {
    #     'optimizer': {
    #         'name': 'sgd',
    #         'lr': 1e-3,
    #     },
    #     'scheduler': {
    #         'step_size': 15,
    #         'gamma': 1.25
    #     }
    # }

    with open(args.config, 'r') as f:
        architecture_args = json.load(f)

    model_class = getattr(methods, args.model_class)

    model = model_class(input_shape=train_loader.dataset[0][0].shape,
                        architecture_args=architecture_args,
                        pretrained_arg=args.pretrained_arg,
                        device=args.device,
                        grad_weight_decay=args.grad_weight_decay,
                        grad_l1_penalty=args.grad_l1_penalty,
                        lamb=args.lamb,
                        sample_from_q=args.sample_from_q,
                        q_dist=args.q_dist,
                        load_from=args.load_from,
                        loss_function=args.loss_function,
                        loss_function_param=args.loss_function_param,
                        add_noise=args.add_noise,
                        noise_type=args.noise_type,
                        noise_std=args.noise_std,
                        detach=args.detach,
                        warm_up=args.warm_up)

    metrics_list = []
    if args.dataset == 'imagenet':
        metrics_list.append(metrics.TopKAccuracy(k=5, output_key='pred'))

    training.train(model=model,
                   train_loader=train_loader,
                   val_loader=val_loader,
                   epochs=args.epochs,
                   save_iter=args.save_iter,
                   vis_iter=args.vis_iter,
                   optimization_args=optimization_args,
                   log_dir=args.log_dir,
                   args_to_log=args,
                   stopping_param=args.stopping_param,
                   metrics=metrics_list)

    # if training finishes successfully, compute the test score
    print("Testing the best validation model...")
    model = utils.load(os.path.join(args.log_dir, 'checkpoints',
                                    'best_val.mdl'),
                       device=args.device)
    pred = utils.apply_on_dataset(model,
                                  test_loader.dataset,
                                  batch_size=args.batch_size,
                                  output_keys_regexp='pred',
                                  description='Testing')['pred']
    labels = [p[1] for p in test_loader.dataset]
    labels = torch.tensor(labels, dtype=torch.long)
    labels = utils.to_cpu(labels)
    with open(os.path.join(args.log_dir, 'test_predictions.pkl'), 'wb') as f:
        pickle.dump({'pred': pred, 'labels': labels}, f)

    accuracy = torch.mean((pred.argmax(dim=1) == labels).float())
    with open(os.path.join(args.log_dir, 'test_accuracy.txt'), 'w') as f:
        f.write("{}\n".format(accuracy))
Ejemplo n.º 8
0
from  modules.DataAndVisualization.vizualiser import (
    plot_scatter as scatter,
    plot_parallel as parallel,
)
from modules.utils import load
# Visualizing the two bar truss problem

# You can use the parallel plot with any dimension but the scatter will only work with 2 and 3 dimension.

# objectives as a reminder:
# weight, stress, buckling stress and deflection

# First load the solution. For more details on on the solution check the readme file in modules/DataAndVisualization
obj, var, nadir, ideal = load("tb4")

# This problem is 3 dimensional as it is only optimizing weight, stress and buckling stress
# so we can use a scatter or a parallel plot to visualize it. We will use a scatter plot

# Set the axis names accordingly
axis_names = ["weight", "Stress", "buckling stress"]

# (3D) Scatter plot
scatter(obj, axis_names)


# An example on parallel plots

# Parallel plot will open a new browser window and display the plot there. 
# Only 1000 random samples are chosen for the plot
# You can choose an axis ranges to highlight solutions that fall in those ranges interactively
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--device", "-d", default="cuda")

    parser.add_argument("--batch_size", "-b", type=int, default=256)
    parser.add_argument("--seed", type=int, default=42)

    parser.add_argument(
        "--dataset",
        "-D",
        type=str,
        default="mnist",
        choices=["mnist", "cifar10", "cifar100", "clothing1m", "imagenet"],
    )
    parser.add_argument(
        "--data_augmentation", "-A", action="store_true", dest="data_augmentation"
    )
    parser.set_defaults(data_augmentation=False)
    parser.add_argument("--num_train_examples", type=int, default=None)
    parser.add_argument("--label_noise_level", "-n", type=float, default=0.0)
    parser.add_argument(
        "--label_noise_type",
        type=str,
        default="flip",
        choices=["flip", "error", "cifar10_custom"],
    )
    parser.add_argument(
        "--transform_function",
        type=str,
        default=None,
        choices=[None, "remove_random_chunks"],
    )
    parser.add_argument(
        "--clean_validation", dest="clean_validation", action="store_true"
    )
    parser.set_defaults(clean_validation=False)
    parser.add_argument("--remove_prob", type=float, default=0.5)

    parser.add_argument("--load_from", type=str, default=None, required=True)
    parser.add_argument("--output_dir", "-o", type=str, default=None)

    args = parser.parse_args()
    print(args)

    # Load data
    _, _, test_loader = datasets.load_data_from_arguments(args)

    print(f"Testing the model saved at {args.load_from}")
    model = utils.load(args.load_from, device=args.device)
    ret = utils.apply_on_dataset(
        model,
        test_loader.dataset,
        batch_size=args.batch_size,
        output_keys_regexp="pred|label",
        description="Testing",
    )
    pred = ret["pred"]
    labels = ret["label"]
    if args.output_dir is not None:
        with open(os.path.join(args.output_dir, "test_predictions.pkl"), "wb") as f:
            pickle.dump({"pred": pred, "labels": labels}, f)

    accuracy = torch.mean((pred.argmax(dim=1) == labels).float())
    print(accuracy)
    if args.output_dir is not None:
        with open(os.path.join(args.output_dir, "test_accuracy.txt"), "w") as f:
            f.write("{}\n".format(accuracy))
Ejemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", "-c", type=str, required=True)
    parser.add_argument("--device", "-d", default="cuda")

    parser.add_argument("--batch_size", "-b", type=int, default=128)
    parser.add_argument("--epochs", "-e", type=int, default=4000)
    parser.add_argument("--stopping_param", type=int, default=2**30)
    parser.add_argument("--save_iter", "-s", type=int, default=100)
    parser.add_argument("--vis_iter", "-v", type=int, default=10)
    parser.add_argument("--log_dir", "-l", type=str, default=None)
    parser.add_argument("--seed", type=int, default=42)

    parser.add_argument(
        "--dataset",
        "-D",
        type=str,
        default="cifar10",
        choices=["mnist", "cifar10", "cifar100", "clothing1m", "imagenet"],
    )
    parser.add_argument("--data_augmentation",
                        "-A",
                        action="store_true",
                        dest="data_augmentation")
    parser.set_defaults(data_augmentation=False)
    parser.add_argument("--num_train_examples", type=int, default=None)
    parser.add_argument("--label_noise_level", "-n", type=float, default=0.0)
    parser.add_argument(
        "--label_noise_type",
        type=str,
        default="error",
        choices=["error", "cifar10_custom"],
    )
    parser.add_argument(
        "--transform_function",
        type=str,
        default=None,
        choices=[None, "remove_random_chunks"],
    )
    parser.add_argument("--clean_validation",
                        dest="clean_validation",
                        action="store_true")
    parser.set_defaults(clean_validation=False)
    parser.add_argument("--remove_prob", type=float, default=0.5)

    parser.add_argument("--model_class",
                        "-m",
                        type=str,
                        default="StandardClassifier")
    parser.add_argument("--load_from", type=str, default=None)
    parser.add_argument("--grad_weight_decay", "-L", type=float, default=0.0)
    parser.add_argument("--lamb", type=float, default=1.0)
    parser.add_argument("--pretrained_arg", "-r", type=str, default=None)
    parser.add_argument("--sample_from_q",
                        action="store_true",
                        dest="sample_from_q")
    parser.set_defaults(sample_from_q=False)
    parser.add_argument("--q_dist",
                        type=str,
                        default="Gaussian",
                        choices=["Gaussian", "Laplace", "dot"])
    parser.add_argument("--weight_decay", type=float, default=0.0)

    parser.add_argument("--lr", type=float, default=1e-4, help="Learning rate")

    parser.add_argument(
        "--k",
        "-k",
        type=int,
        required=False,
        default=10,
        help="width parameter of ResNet18-k",
    )
    parser.add_argument("--exclude_percent", type=float, default=0.0)
    args = parser.parse_args()
    print(args)

    # Load data
    train_loader, val_loader, test_loader = datasets.load_data_from_arguments(
        args)

    # Options
    optimization_args = {
        "optimizer": {
            "name": "adam",
            "lr": args.lr,
            "weight_decay": args.weight_decay
        }
    }

    with open(args.config, "r") as f:
        architecture_args = json.load(f)

        # set the width parameter k
        if ("classifier" in architecture_args
                and architecture_args["classifier"].get(
                    "net", "").find("double-descent") != -1):
            architecture_args["classifier"]["k"] = args.k
        if ("q-network" in architecture_args
                and architecture_args["classifier"].get(
                    "net", "").find("double-descent") != -1):
            architecture_args["q-network"]["k"] = args.k

    model_class = getattr(methods, args.model_class)

    model = model_class(
        input_shape=train_loader.dataset[0][0].shape,
        architecture_args=architecture_args,
        pretrained_arg=args.pretrained_arg,
        device=args.device,
        grad_weight_decay=args.grad_weight_decay,
        lamb=args.lamb,
        sample_from_q=args.sample_from_q,
        q_dist=args.q_dist,
        load_from=args.load_from,
        loss_function="ce",
    )

    training.train(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        epochs=args.epochs,
        save_iter=args.save_iter,
        vis_iter=args.vis_iter,
        optimization_args=optimization_args,
        log_dir=args.log_dir,
        args_to_log=args,
        stopping_param=args.stopping_param,
    )

    # test the last model and best model
    models_to_test = [
        {
            "name": "best",
            "file": "best_val.mdl"
        },
        {
            "name": "final",
            "file": "final.mdl"
        },
    ]
    for spec in models_to_test:
        print("Testing the {} model...".format(spec["name"]))
        model = utils.load(os.path.join(args.log_dir, "checkpoints",
                                        spec["file"]),
                           device=args.device)
        pred = utils.apply_on_dataset(
            model,
            test_loader.dataset,
            batch_size=args.batch_size,
            output_keys_regexp="pred",
            description="Testing",
        )["pred"]
        labels = [p[1] for p in test_loader.dataset]
        labels = torch.tensor(labels, dtype=torch.long)
        labels = utils.to_cpu(labels)
        with open(
                os.path.join(args.log_dir,
                             "{}_test_predictions.pkl".format(spec["name"])),
                "wb",
        ) as f:
            pickle.dump({"pred": pred, "labels": labels}, f)

        accuracy = torch.mean((pred.argmax(dim=1) == labels).float())
        with open(
                os.path.join(args.log_dir,
                             "{}_test_accuracy.txt".format(spec["name"])),
                "w") as f:
            f.write("{}\n".format(accuracy))
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", "-c", type=str, default=None)
    parser.add_argument("--device", "-d", default="cuda")

    parser.add_argument("--batch_size", "-b", type=int, default=256)
    parser.add_argument("--epochs", "-e", type=int, default=400)
    parser.add_argument("--stopping_param", type=int, default=50)
    parser.add_argument("--save_iter", "-s", type=int, default=10)
    parser.add_argument("--vis_iter", "-v", type=int, default=10)
    parser.add_argument("--log_dir", "-l", type=str, default=None)
    parser.add_argument("--seed", type=int, default=42)

    parser.add_argument(
        "--dataset",
        "-D",
        type=str,
        default="mnist",
        choices=[
            "mnist", "cifar10", "cifar100", "clothing1m", "imagenet", "cover"
        ],
    )
    # parser.add_argument("--image-root", metavar="DIR", help="path to images")
    # parser.add_argument("--label", metavar="DIR", help="path to label file")
    parser.add_argument("--data_augmentation",
                        "-A",
                        action="store_true",
                        dest="data_augmentation")
    parser.set_defaults(data_augmentation=False)
    parser.add_argument("--num_train_examples", type=int, default=None)
    parser.add_argument("--label_noise_level", "-n", type=float, default=0.0)
    parser.add_argument(
        "--label_noise_type",
        type=str,
        default="error",
        choices=["error", "cifar10_custom"],
    )
    parser.add_argument(
        "--transform_function",
        type=str,
        default=None,
        choices=[None, "remove_random_chunks"],
    )
    parser.add_argument("--clean_validation",
                        dest="clean_validation",
                        action="store_true")
    parser.set_defaults(clean_validation=False)
    parser.add_argument("--remove_prob", type=float, default=0.5)

    parser.add_argument("--model_class",
                        "-m",
                        type=str,
                        default="StandardClassifier")
    parser.add_argument(
        "--loss_function",
        type=str,
        default="ce",
        choices=["ce", "mse", "mae", "gce", "dmi", "fw", "none"],
    )
    parser.add_argument("--loss_function_param", type=float, default=1.0)
    parser.add_argument("--load_from", type=str, default=None)
    parser.add_argument("--grad_weight_decay", "-L", type=float, default=0.0)
    parser.add_argument("--grad_l1_penalty", "-S", type=float, default=0.0)
    parser.add_argument("--lamb", type=float, default=1.0)
    parser.add_argument("--pretrained_arg", "-r", type=str, default=None)
    parser.add_argument("--sample_from_q",
                        action="store_true",
                        dest="sample_from_q")
    parser.set_defaults(sample_from_q=False)
    parser.add_argument("--q_dist",
                        type=str,
                        default="Gaussian",
                        choices=["Gaussian", "Laplace", "dot"])
    parser.add_argument("--no-detach", dest="detach", action="store_false")
    parser.set_defaults(detach=True)
    parser.add_argument(
        "--warm_up",
        type=int,
        default=0,
        help="Number of epochs to skip before "
        "starting to train using predicted gradients",
    )
    parser.add_argument("--weight_decay", type=float, default=0.0)

    parser.add_argument(
        "--add_noise",
        action="store_true",
        dest="add_noise",
        help="add noise to the gradients of a standard classifier.",
    )
    parser.set_defaults(add_noise=False)
    parser.add_argument("--noise_type",
                        type=str,
                        default="Gaussian",
                        choices=["Gaussian", "Laplace"])
    parser.add_argument("--noise_std", type=float, default=0.0)

    parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate")
    args = parser.parse_args()
    print(args)

    # Load data
    args.image_root = [
        "/data/chenlong.1024/5198852-tiktok-1w_images",
        "/data/chenlong.1024/5205046-tiktok-10w_images",
        "/data/chenlong.1024/5599074-tiktok-impr_cnt20_images",
        "/data/chenlong.1024/5600297-tiktok-impr_cnt10_images",
    ]
    args.label = [
        "/data/chenlong.1024/5198852-tiktok-1w.csv",
        "/data/chenlong.1024/5205046-tiktok-10w.csv",
        "/data/chenlong.1024/5599074-tiktok-impr_cnt20.csv",
        "/data/chenlong.1024/5600297-tiktok-impr_cnt10.csv",
    ]
    train_loader, val_loader, test_loader = datasets.load_data_from_arguments(
        args)

    # Options
    optimization_args = {
        "optimizer": {
            "name": "adam",
            "lr": args.lr,
            "weight_decay": args.weight_decay
        },
        "scheduler": {
            "step_size": 20,
            "gamma": 0.3
        },
    }

    # optimization_args = {
    #     'optimizer': {
    #         'name': 'sgd',
    #         'lr': 1e-3,
    #     },
    #     'scheduler': {
    #         'step_size': 15,
    #         'gamma': 1.25
    #     }
    # }

    model_class = getattr(methods, args.model_class)

    if "CoverModel" in args.model_class:
        model = model_class(
            num_classes=2,
            pretrained=True,
            device=args.device,
            grad_weight_decay=args.grad_weight_decay,
            grad_l1_penalty=args.grad_l1_penalty,
            lamb=args.lamb,
            sample_from_q=args.sample_from_q,
            q_dist=args.q_dist,
            load_from=args.load_from,
            loss_function=args.loss_function,
            loss_function_param=args.loss_function_param,
            add_noise=args.add_noise,
            noise_type=args.noise_type,
            noise_std=args.noise_std,
            detach=args.detach,
            warm_up=args.warm_up,
        )
    else:
        with open(args.config, "r") as f:
            architecture_args = json.load(f)
        model = model_class(
            input_shape=train_loader.dataset[0][0].shape,
            architecture_args=architecture_args,
            pretrained_arg=args.pretrained_arg,
            device=args.device,
            grad_weight_decay=args.grad_weight_decay,
            grad_l1_penalty=args.grad_l1_penalty,
            lamb=args.lamb,
            sample_from_q=args.sample_from_q,
            q_dist=args.q_dist,
            load_from=args.load_from,
            loss_function=args.loss_function,
            loss_function_param=args.loss_function_param,
            add_noise=args.add_noise,
            noise_type=args.noise_type,
            noise_std=args.noise_std,
            detach=args.detach,
            warm_up=args.warm_up,
        )

    metrics_list = []
    if args.dataset == "imagenet":
        metrics_list.append(metrics.TopKAccuracy(k=5, output_key="pred"))

    training.train(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        epochs=args.epochs,
        save_iter=args.save_iter,
        vis_iter=args.vis_iter,
        optimization_args=optimization_args,
        log_dir=args.log_dir,
        args_to_log=args,
        stopping_param=args.stopping_param,
        metrics=metrics_list,
    )

    # if training finishes successfully, compute the test score
    print("Testing the best validation model...")
    model = utils.load(os.path.join(args.log_dir, "checkpoints",
                                    "best_val.mdl"),
                       device=args.device)
    pred = utils.apply_on_dataset(
        model,
        test_loader.dataset,
        batch_size=args.batch_size,
        output_keys_regexp="pred",
        description="Testing",
    )["pred"]
    labels = [p[1] for p in test_loader.dataset]
    labels = torch.tensor(labels, dtype=torch.long)
    labels = utils.to_cpu(labels)
    with open(os.path.join(args.log_dir, "test_predictions.pkl"), "wb") as f:
        pickle.dump({"pred": pred, "labels": labels}, f)

    accuracy = torch.mean((pred.argmax(dim=1) == labels).float())
    with open(os.path.join(args.log_dir, "test_accuracy.txt"), "w") as f:
        f.write("{}\n".format(accuracy))
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', '-c', type=str, required=True)
    parser.add_argument('--device', '-d', default='cuda')

    parser.add_argument('--batch_size', '-b', type=int, default=128)
    parser.add_argument('--epochs', '-e', type=int, default=4000)
    parser.add_argument('--stopping_param', type=int, default=2**30)
    parser.add_argument('--save_iter', '-s', type=int, default=100)
    parser.add_argument('--vis_iter', '-v', type=int, default=10)
    parser.add_argument('--log_dir', '-l', type=str, default=None)
    parser.add_argument('--seed', type=int, default=42)

    parser.add_argument(
        '--dataset',
        '-D',
        type=str,
        default='cifar10',
        choices=['mnist', 'cifar10', 'cifar100', 'clothing1m', 'imagenet'])
    parser.add_argument('--data_augmentation',
                        '-A',
                        action='store_true',
                        dest='data_augmentation')
    parser.set_defaults(data_augmentation=False)
    parser.add_argument('--num_train_examples', type=int, default=None)
    parser.add_argument('--label_noise_level', '-n', type=float, default=0.0)
    parser.add_argument('--label_noise_type',
                        type=str,
                        default='error',
                        choices=['error', 'cifar10_custom'])
    parser.add_argument('--transform_function',
                        type=str,
                        default=None,
                        choices=[None, 'remove_random_chunks'])
    parser.add_argument('--clean_validation',
                        dest='clean_validation',
                        action='store_true')
    parser.set_defaults(clean_validation=False)
    parser.add_argument('--remove_prob', type=float, default=0.5)

    parser.add_argument('--model_class',
                        '-m',
                        type=str,
                        default='StandardClassifier')
    parser.add_argument('--load_from', type=str, default=None)
    parser.add_argument('--grad_weight_decay', '-L', type=float, default=0.0)
    parser.add_argument('--lamb', type=float, default=1.0)
    parser.add_argument('--pretrained_arg', '-r', type=str, default=None)
    parser.add_argument('--sample_from_q',
                        action='store_true',
                        dest='sample_from_q')
    parser.set_defaults(sample_from_q=False)
    parser.add_argument('--q_dist',
                        type=str,
                        default='Gaussian',
                        choices=['Gaussian', 'Laplace', 'dot'])
    parser.add_argument('--weight_decay', type=float, default=0.0)

    parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate')

    parser.add_argument('--k',
                        '-k',
                        type=int,
                        required=True,
                        default=10,
                        help='width parameter of ResNet18-k')
    args = parser.parse_args()
    print(args)

    # Load data
    train_loader, val_loader, test_loader = datasets.load_data_from_arguments(
        args)

    # Options
    optimization_args = {
        'optimizer': {
            'name': 'adam',
            'lr': args.lr,
            'weight_decay': args.weight_decay
        }
    }

    with open(args.config, 'r') as f:
        architecture_args = json.load(f)

        # set the width parameter k
        if ('classifier' in architecture_args
                and architecture_args['classifier'].get(
                    'net', '') == 'double-descent-cifar10-resnet18'):
            architecture_args['classifier']['k'] = args.k
        if ('q-network' in architecture_args
                and architecture_args['q-network'].get(
                    'net', '') == 'double-descent-cifar10-resnet18'):
            architecture_args['q-network']['k'] = args.k

    model_class = getattr(methods, args.model_class)

    model = model_class(input_shape=train_loader.dataset[0][0].shape,
                        architecture_args=architecture_args,
                        pretrained_arg=args.pretrained_arg,
                        device=args.device,
                        grad_weight_decay=args.grad_weight_decay,
                        lamb=args.lamb,
                        sample_from_q=args.sample_from_q,
                        q_dist=args.q_dist,
                        load_from=args.load_from,
                        loss_function='ce')

    training.train(model=model,
                   train_loader=train_loader,
                   val_loader=val_loader,
                   epochs=args.epochs,
                   save_iter=args.save_iter,
                   vis_iter=args.vis_iter,
                   optimization_args=optimization_args,
                   log_dir=args.log_dir,
                   args_to_log=args,
                   stopping_param=args.stopping_param)

    # test the last model and best model
    models_to_test = [{
        'name': 'best',
        'file': 'best_val.mdl'
    }, {
        'name': 'final',
        'file': 'final.mdl'
    }]
    for spec in models_to_test:
        print("Testing the {} model...".format(spec['name']))
        model = utils.load(os.path.join(args.log_dir, 'checkpoints',
                                        spec['file']),
                           device=args.device)
        pred = utils.apply_on_dataset(model,
                                      test_loader.dataset,
                                      batch_size=args.batch_size,
                                      output_keys_regexp='pred',
                                      description='Testing')['pred']
        labels = [p[1] for p in test_loader.dataset]
        labels = torch.tensor(labels, dtype=torch.long)
        labels = utils.to_cpu(labels)
        with open(
                os.path.join(args.log_dir,
                             '{}_test_predictions.pkl'.format(spec['name'])),
                'wb') as f:
            pickle.dump({'pred': pred, 'labels': labels}, f)

        accuracy = torch.mean((pred.argmax(dim=1) == labels).float())
        with open(
                os.path.join(args.log_dir,
                             '{}_test_accuracy.txt'.format(spec['name'])),
                'w') as f:
            f.write("{}\n".format(accuracy))
Ejemplo n.º 13
0
from modules.DataAndVisualization.vizualiser import (
    visualize as interactive_scatter,
    plot_parallel as parallel,
)
from modules.utils import load

# Visualizing the geometry design problem

# Objectives as a reminder
# surface area, volume, min height and floor area

# First load the solution. For more details on on the solution check the readme file in modules/DataAndVisualization
obj, var, _nadir, _ideal = load("gd1")

# You can ignore this part
# Make sure all values are positive as some of the objectives may be flipped
# Because setting pfront to true in creating problem method will flip some values.
obj = abs(obj)

# Axis names for the plot
axis_names = ["Surface area", "Volume", "Min height", "Floor area"]

# As this is a 4d solution we'll have to use a parallel plot:
# parallel(obj, axis_names)

# Geometry design problems supports 2 and 3 dimensional interactive scatter plots:
# Click a point => Corresponging tent will be plotted and values will be printed to console
# => close the tent plot => back to objectives plot

# Load a 3d problem
obj, var, _nadir, _ideal = load("gd2")
Ejemplo n.º 14
0
    def __init__(self,
                 input_shape,
                 architecture_args,
                 pretrained_arg=None,
                 device='cuda',
                 grad_weight_decay=0.0,
                 grad_l1_penalty=0.0,
                 lamb=1.0,
                 sample_from_q=False,
                 q_dist='Gaussian',
                 loss_function='ce',
                 detach=True,
                 load_from=None,
                 warm_up=0,
                 **kwargs):
        super(PredictGradOutput, self).__init__(**kwargs)

        self.args = {
            'input_shape': input_shape,
            'architecture_args': architecture_args,
            'pretrained_arg': pretrained_arg,
            'device': device,
            'grad_weight_decay': grad_weight_decay,
            'grad_l1_penalty': grad_l1_penalty,
            'lamb': lamb,
            'sample_from_q': sample_from_q,
            'q_dist': q_dist,
            'loss_function': loss_function,
            'detach': detach,
            'load_from': load_from,
            'warm_up': warm_up,
            'class': 'PredictGradOutput'
        }

        assert len(input_shape) == 3
        self.input_shape = [None] + list(input_shape)
        self.architecture_args = architecture_args
        self.pretrained_arg = pretrained_arg
        self.device = device
        self.grad_weight_decay = grad_weight_decay
        self.grad_l1_penalty = grad_l1_penalty
        self.lamb = lamb
        self.sample_from_q = sample_from_q
        self.q_dist = q_dist
        self.detach = detach
        self.loss_function = loss_function
        self.load_from = load_from
        self.warm_up = warm_up

        # lamb is the coefficient in front of the H(p,q) term. It controls the variance of predicted gradients.
        if self.q_dist == 'Gaussian':
            self.grad_replacement_class = nn_utils.get_grad_replacement_class(
                sample=self.sample_from_q,
                standard_dev=np.sqrt(1.0 / 2.0 / (self.lamb + 1e-12)),
                q_dist=self.q_dist)
        elif self.q_dist == 'Laplace':
            self.grad_replacement_class = nn_utils.get_grad_replacement_class(
                sample=self.sample_from_q,
                standard_dev=np.sqrt(2.0) / (self.lamb + 1e-6),
                q_dist=self.q_dist)
        elif self.q_dist == 'dot':
            assert not self.sample_from_q
            self.grad_replacement_class = nn_utils.get_grad_replacement_class(
                sample=False)
        else:
            raise NotImplementedError()

        # initialize the network
        self.classifier, output_shape = nn_utils.parse_feed_forward(
            args=self.architecture_args['classifier'],
            input_shape=self.input_shape)
        self.classifier = self.classifier.to(self.device)
        self.num_classes = output_shape[-1]

        if self.pretrained_arg is not None:
            q_base = pretrained_models.get_pretrained_model(
                self.pretrained_arg, self.input_shape, self.device)

            # create the trainable part of the q_network
            q_top = torch.nn.Sequential(
                torch.nn.Linear(q_base.output_shape[-1], 128),
                torch.nn.ReLU(inplace=True),
                torch.nn.Linear(128, self.num_classes)).to(self.device)

            self.q_network = torch.nn.Sequential(q_base, q_top)
        else:
            self.q_network, _ = nn_utils.parse_feed_forward(
                args=self.architecture_args['q-network'],
                input_shape=self.input_shape)
            self.q_network = self.q_network.to(self.device)

            if self.load_from is not None:
                print("Loading the gradient predictor model from {}".format(
                    load_from))
                stored_net = utils.load(load_from, device='cpu')
                stored_net_params = dict(
                    stored_net.classifier.named_parameters())
                for key, param in self.q_network.named_parameters():
                    param.data = stored_net_params[key].data.to(self.device)

        self.q_loss = None
        if self.loss_function == 'none':  # predicted gradient has general form
            self.q_loss = torch.nn.Sequential(
                torch.nn.Linear(2 * self.num_classes, 128),
                torch.nn.ReLU(inplace=True),
                torch.nn.Linear(128, self.num_classes)).to(self.device)
from modules.DataAndVisualization.vizualiser import (
    plot_scatter as scatter,
    visualize as interactive_scatter,
    plot_parallel as parallel,
)
from modules.utils import load

# Visualizing the geometry design problem with constant floor

# First load the solution. For more details on on the solution check the readme file in modules/DataAndVisualization
obj, var, nadir, ideal = load("gdcExample1")

# You can ignore this part
# Make sure all values are positive as some of the objectives may be flipped
# Because setting pfront to true in creating problem method will flip some values.
obj = abs(obj)

# Axis names for the plot
axis_names = ["Surface area", "Volume"]

# Constant floor Geometry design problems supports interactive scatter plot as it is only 2 dimensional:
# Click a point => Corresponging tent will be plotted and values will be printed to console
# => close the tent plot => back to objectives plot
interactive_scatter(obj, var, axis_names)

# You can also use other plots:

# Scatter
scatter(obj, axis_names)

# parallel, this is actually quite interesting
Ejemplo n.º 16
0
    def __init__(self,
                 input_shape,
                 architecture_args,
                 pretrained_arg=None,
                 device="cuda",
                 grad_weight_decay=0.0,
                 grad_l1_penalty=0.0,
                 lamb=1.0,
                 sample_from_q=False,
                 q_dist="Gaussian",
                 loss_function="ce",
                 detach=True,
                 load_from=None,
                 warm_up=0,
                 **kwargs):
        super(PredictGradOutput, self).__init__(**kwargs)

        self.args = {
            "input_shape": input_shape,
            "architecture_args": architecture_args,
            "pretrained_arg": pretrained_arg,
            "device": device,
            "grad_weight_decay": grad_weight_decay,
            "grad_l1_penalty": grad_l1_penalty,
            "lamb": lamb,
            "sample_from_q": sample_from_q,
            "q_dist": q_dist,
            "loss_function": loss_function,
            "detach": detach,
            "load_from": load_from,
            "warm_up": warm_up,
            "class": "PredictGradOutput",
        }

        assert len(input_shape) == 3
        self.input_shape = [None] + list(input_shape)
        self.architecture_args = architecture_args
        self.pretrained_arg = pretrained_arg
        self.device = device
        self.grad_weight_decay = grad_weight_decay
        self.grad_l1_penalty = grad_l1_penalty
        self.lamb = lamb
        self.sample_from_q = sample_from_q
        self.q_dist = q_dist
        self.detach = detach
        self.loss_function = loss_function
        self.load_from = load_from
        self.warm_up = warm_up

        # lamb is the coefficient in front of the H(p,q) term. It controls the variance of predicted gradients.
        if self.q_dist == "Gaussian":
            self.grad_replacement_class = nn_utils.get_grad_replacement_class(
                sample=self.sample_from_q,
                standard_dev=np.sqrt(1.0 / 2.0 / (self.lamb + 1e-12)),
                q_dist=self.q_dist,
            )
        elif self.q_dist == "Laplace":
            self.grad_replacement_class = nn_utils.get_grad_replacement_class(
                sample=self.sample_from_q,
                standard_dev=np.sqrt(2.0) / (self.lamb + 1e-6),
                q_dist=self.q_dist,
            )
        elif self.q_dist == "dot":
            assert not self.sample_from_q
            self.grad_replacement_class = nn_utils.get_grad_replacement_class(
                sample=False)
        else:
            raise NotImplementedError()

        # initialize the network
        self.classifier, output_shape = nn_utils.parse_feed_forward(
            args=self.architecture_args["classifier"],
            input_shape=self.input_shape)
        self.classifier = self.classifier.to(self.device)
        self.num_classes = output_shape[-1]

        if self.pretrained_arg is not None:
            q_base = pretrained_models.get_pretrained_model(
                self.pretrained_arg, self.input_shape, self.device)

            # create the trainable part of the q_network
            q_top = torch.nn.Sequential(
                torch.nn.Linear(q_base.output_shape[-1], 128),
                torch.nn.ReLU(inplace=True),
                torch.nn.Linear(128, self.num_classes),
            ).to(self.device)

            self.q_network = torch.nn.Sequential(q_base, q_top)
        else:
            self.q_network, _ = nn_utils.parse_feed_forward(
                args=self.architecture_args["q-network"],
                input_shape=self.input_shape)
            self.q_network = self.q_network.to(self.device)

            if self.load_from is not None:
                print("Loading the gradient predictor model from {}".format(
                    load_from))
                stored_net = utils.load(load_from, device="cpu")
                stored_net_params = dict(
                    stored_net.classifier.named_parameters())
                for key, param in self.q_network.named_parameters():
                    param.data = stored_net_params[key].data.to(self.device)

        self.q_loss = None
        if self.loss_function == "none":  # predicted gradient has general form
            self.q_loss = torch.nn.Sequential(
                torch.nn.Linear(2 * self.num_classes, 128),
                torch.nn.ReLU(inplace=True),
                torch.nn.Linear(128, self.num_classes),
            ).to(self.device)