Esempio n. 1
0
    def __init__(self,
                 model,
                 dataset,
                 datadir,
                 optim,
                 scheduler,
                 optim2=None,
                 scheduler2=None,
                 batch_size=128,
                 seed=None,
                 num_gpus=1,
                 verbose=False):
        # Parameters like learning rate and momentum can be speicified by the
        # config search space. If not specified, fall back to the args
        self.dataset = dataset
        self.datadir = datadir
        self.num_gpus = num_gpus
        self.verbose = verbose
        self.model = model
        self.optimizer = optim
        self.scheduler = scheduler
        self.optimizer2 = optim2
        self.scheduler2 = scheduler2

        num_workers = 4
        if seed is not None:
            np.random.seed(seed)
            random.seed(seed)
            torch.manual_seed(seed)
            num_workers = 0
            if self.use_cuda:
                torch.cuda.manual_seed(seed)
        else:
            seed = random.randint(0, 10000)

        # ######################################################################
        #  Data
        kwargs = {}
        if self.use_cuda:
            kwargs = {'num_workers': num_workers, 'pin_memory': True}
        if dataset.startswith('cifar'):
            self.train_loader, self.test_loader = cifar.get_data(
                32, datadir, dataset=dataset, batch_size=batch_size, **kwargs)
        elif dataset == 'tiny_imagenet':
            self.train_loader, self.test_loader = tiny_imagenet.get_data(
                64, datadir, val_only=False, batch_size=batch_size, **kwargs)

        # ######################################################################
        # Split across GPUs
        if torch.cuda.device_count() > 1 and num_gpus > 1:
            self.model = nn.DataParallel(self.model)
            model = self.model.module
        else:
            model = self.model
        if self.use_cuda:
            self.model.cuda()
    def _setup(self, config):
        args = config.pop("args")
        vars(args).update(config)
        type_ = config.get('type', 'gainA')
        use_dwt = config.get('dwt', False)
        C = config.get('num_channels', 64)
        dataset = config.get('dataset', args.dataset)
        if hasattr(args, 'verbose'):
            self._verbose = args.verbose

        num_workers = 4
        if args.seed is not None:
            np.random.seed(args.seed)
            random.seed(args.seed)
            torch.manual_seed(args.seed)
            num_workers = 0
            if self.use_cuda:
                torch.cuda.manual_seed(args.seed)

        # ######################################################################
        #  Data
        kwargs = {'num_workers': num_workers, 'pin_memory': True} if self.use_cuda else {}
        if args.dataset.startswith('cifar'):
            self.train_loader, self.test_loader = cifar.get_data(
                32, args.datadir, dataset=dataset,
                batch_size=args.batch_size, trainsize=args.trainsize,
                **kwargs)
        elif args.dataset == 'tiny_imagenet':
            self.train_loader, self.test_loader = tiny_imagenet.get_data(
                64, args.datadir, val_only=False,
                batch_size=args.batch_size, trainsize=args.trainsize,
                distributed=False, **kwargs)

        # ######################################################################
        # Build the network based on the type parameter. θ are the optimal
        # hyperparameters found by cross validation.
        if type_.startswith('ref'):
            θ = (0.1, 0.9, 1e-4)
        else:
            θ = (0.45, 0.8, 1e-4)
            #  raise ValueError('Unknown type')
        lr, mom, wd = θ
        # If the parameters were provided as an option, use them
        lr = config.get('lr', lr)
        mom = config.get('mom', mom)
        wd = config.get('wd', wd)
        wd1 = config.get('wd1', wd)
        std = config.get('std', 1.0)

        # Build the network
        self.model = MixedNet(args.dataset, type_, use_dwt, C, wd, wd1)
        init = lambda x: net_init(x, std)
        self.model.apply(init)

        # Split across GPUs
        if torch.cuda.device_count() > 1 and config.get('num_gpus', 0) > 1:
            self.model = nn.DataParallel(self.model)
            model = self.model.module
        else:
            model = self.model
        if self.use_cuda:
            self.model.cuda()

        # ######################################################################
        # Build the optimizer - use separate parameter groups for the gain
        # and convolutional layers
        default_params = list(model.fc1.parameters())
        gain_params = []
        for name, module in model.net.named_children():
            params = [p for p in module.parameters() if p.requires_grad]
            if name.startswith('gain'):
                gain_params += params
            else:
                default_params += params

        self.optimizer, self.scheduler = optim.get_optim(
            'sgd', default_params, init_lr=lr,
            steps=args.steps, wd=wd, gamma=0.2, momentum=mom,
            max_epochs=args.epochs)

        if len(gain_params) > 0:
            # Get special optimizer parameters
            lr1 = config.get('lr1', lr)
            gamma1 = config.get('gamma1', 0.2)
            mom1 = config.get('mom1', mom)
            opt1 = config.get('opt1', 'sgd')
            if lr1 is None:
                lr1 = lr
            if mom1 is None:
                mom1 = mom

            # Do not use the optimizer's weight decay, call a special method to
            # do it.
            self.optimizer1, self.scheduler1 = optim.get_optim(
                opt1, gain_params, init_lr=lr1,
                steps=args.steps, wd=0, gamma=gamma1, momentum=mom1,
                max_epochs=args.epochs)

        if self.verbose:
            print(self.model)
Esempio n. 3
0
    def _setup(self, config):
        args = config.pop("args")
        vars(args).update(config)
        # Parameters like learning rate and momentum can be speicified by the
        # config search space. If not specified, fall back to the args
        type = config.get('type', 'ref')
        lr = config.get('lr', args.lr)
        mom = config.get('mom', args.mom)
        wd = config.get('wd', args.wd)
        C = config.get('num_channels', args.C)
        dataset = config.get('dataset', args.dataset)
        num_gpus = config.get('num_gpus', args.num_gpus)

        # Get optimizer parameters for gainlayer
        mom1 = config.get('mom1', args.mom1)
        lr1 = config.get('lr1', args.lr1)
        wd1 = config.get('wd1', args.wd1)
        opt1 = config.get('opt1', args.opt1)
        gamma1 = config.get('gamma1', 0.2)
        if mom1 is None:
            mom1 = mom
        if lr1 is None:
            lr1 = lr

        # Get nonlinearity options
        pixel_nl = config.get('pixel_nl', args.pixel_nl)
        lp_nl = config.get('lp_nl', args.lp_nl)
        bp_nl = config.get('bp_nl', args.bp_nl)
        lp_q = config.get('lp_q', args.lp_q)
        bp_q = config.get('bp_q', args.bp_q)
        lp_thresh = config.get('lp_thresh', 1)
        bp_thresh = config.get('bp_thresh', 1)

        if hasattr(args, 'verbose'):
            self._verbose = args.verbose

        num_workers = 4
        if args.seed is not None:
            np.random.seed(args.seed)
            random.seed(args.seed)
            torch.manual_seed(args.seed)
            num_workers = 0
            if self.use_cuda:
                torch.cuda.manual_seed(args.seed)
        else:
            args.seed = random.randint(0, 10000)

        # ######################################################################
        #  Data
        kwargs = {
            'num_workers': num_workers,
            'pin_memory': True
        } if self.use_cuda else {}
        if dataset.startswith('cifar'):
            self.train_loader, self.test_loader = cifar.get_data(
                32,
                args.datadir,
                dataset=dataset,
                batch_size=args.batch_size,
                trainsize=args.trainsize,
                **kwargs)
        elif dataset == 'tiny_imagenet':
            self.train_loader, self.test_loader = tiny_imagenet.get_data(
                64,
                args.datadir,
                val_only=False,
                batch_size=args.batch_size,
                trainsize=args.trainsize,
                distributed=False,
                **kwargs)

        # ######################################################################
        # Build the network based on the type parameter. θ are the optimal
        # hyperparameters found by cross validation.

        # Build the network
        self.model = MixedNet(dataset,
                              type,
                              C,
                              wd,
                              wd1,
                              pixel_nl=pixel_nl,
                              lp_nl=lp_nl,
                              bp_nl=bp_nl,
                              lp_nl_kwargs=dict(q=lp_q, thresh=lp_thresh),
                              bp_nl_kwargs=dict(q=bp_q, thresh=bp_thresh))
        init = lambda x: net_init(x, 1.0)
        self.model.apply(init)

        # Split across GPUs
        if torch.cuda.device_count() > 1 and num_gpus > 1:
            self.model = nn.DataParallel(self.model)
            model = self.model.module
        else:
            model = self.model
        if self.use_cuda:
            self.model.cuda()

        # ######################################################################
        # Build the optimizer - use separate parameter groups for the gain
        # and convolutional layers
        default_params = list(model.fc1.parameters())
        gain_params = []
        for name, module in model.net.named_children():
            params = [p for p in module.parameters() if p.requires_grad]
            if name.startswith('wave'):
                gain_params += params
            else:
                default_params += params

        self.optimizer, self.scheduler = optim.get_optim(
            'sgd',
            default_params,
            init_lr=lr,
            steps=args.steps,
            wd=0,
            gamma=0.2,
            momentum=mom,
            max_epochs=args.epochs)

        if len(gain_params) > 0:
            # Do not use the optimizer's weight decay, call a special method to
            # do it.
            self.optimizer1, self.scheduler1 = optim.get_optim(
                opt1,
                gain_params,
                init_lr=lr1,
                steps=args.steps,
                wd=0,
                gamma=gamma1,
                momentum=mom1,
                max_epochs=args.epochs)

        if self.verbose:
            print(self.model)