def get_H_config(self, dataset, will_train=True):
        print("Preparing training D1+D2 (H)")
        print("Mixture size: %s"%colored('%d'%len(dataset), 'green'))

        # 80%, 20% for local train+test
        train_ds, valid_ds = dataset.split_dataset(0.8)

        if self.args.D1 in Global.mirror_augment:
            print(colored("Mirror augmenting %s"%self.args.D1, 'green'))
            new_train_ds = train_ds + MirroredDataset(train_ds)
            train_ds = new_train_ds

        # Initialize the multi-threaded loaders.
        train_loader = DataLoader(train_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True)
        valid_loader = DataLoader(valid_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True)

        # To make the threshold learning, actually threshold learning
        # the margin must be set to 0.
        criterion = SVMLoss(margin=0.0).to(self.args.device)

        # Set up the model
        model = MCDropoutModelWrapper(self.base_model).to(self.args.device)

        old_valid_loader = valid_loader

        # By definition, this approach is uncacheable :(

        # Set up the config
        config = IterativeTrainerConfig()

        base_model_name = self.base_model.__class__.__name__
        if hasattr(self.base_model, 'preferred_name'):
            base_model_name = self.base_model.preferred_name()

        config.name = '_%s[%s](%s-%s)'%(self.__class__.__name__, base_model_name, self.args.D1, self.args.D2)
        config.train_loader = train_loader
        config.valid_loader = valid_loader
        config.phases = {
                        'train':   {'dataset' : train_loader,  'backward': True},
                        'test':    {'dataset' : valid_loader,  'backward': False},
                        'testU':   {'dataset' : old_valid_loader, 'backward': False},                                                
                        }
        config.criterion = criterion
        config.classification = True
        config.cast_float_label = True
        config.stochastic_gradient = True
        config.visualize = not self.args.no_visualize  
        config.model = model
        config.optim = optim.Adagrad(model.H.parameters(), lr=1e-1, weight_decay=0)
        config.scheduler = optim.lr_scheduler.ReduceLROnPlateau(config.optim, patience=10, threshold=1e-1, min_lr=1e-8, factor=0.1, verbose=True)
        h_path = path.join(self.args.experiment_path, '%s' % (self.__class__.__name__),
                           '%d' % (self.default_model),
                           '%s-%s.pth' % (self.args.D1, self.args.D2))
        h_parent = path.dirname(h_path)
        config.logger = Logger(h_parent)
        config.max_epoch = 100

        return config
Esempio n. 2
0
    def get_H_config(self, dataset, will_train=True):
        print("Preparing training D1+D2 (H)")
        print("Mixture size: %s" % colored('%d' % len(dataset), 'green'))

        # 80%, 20% for local train+test
        train_ds, valid_ds = dataset.split_dataset(0.8)

        if self.args.D1 in Global.mirror_augment:
            print(colored("Mirror augmenting %s" % self.args.D1, 'green'))
            new_train_ds = train_ds + MirroredDataset(train_ds)
            train_ds = new_train_ds

        # Initialize the multi-threaded loaders.
        train_loader = DataLoader(train_ds,
                                  batch_size=self.args.batch_size,
                                  shuffle=True,
                                  num_workers=self.args.workers,
                                  pin_memory=True)
        valid_loader = DataLoader(valid_ds,
                                  batch_size=self.args.batch_size,
                                  shuffle=True,
                                  num_workers=self.args.workers,
                                  pin_memory=True)

        # To make the threshold learning, actually threshold learning
        # the margin must be set to 0.
        criterion = SVMLoss(margin=0.0).to(self.args.device)

        # Set up the model
        model = DeepEnsembleModelWrapper(self.base_model).to(self.args.device)

        old_valid_loader = valid_loader
        if will_train:
            # cache the subnetwork for faster optimization.
            from methods import get_cached
            from torch.utils.data.dataset import TensorDataset

            trainX, trainY = get_cached(model, train_loader, self.args.device)
            validX, validY = get_cached(model, valid_loader, self.args.device)

            new_train_ds = TensorDataset(trainX, trainY)
            new_valid_ds = TensorDataset(validX, validY)

            # Initialize the new multi-threaded loaders.
            train_loader = DataLoader(new_train_ds,
                                      batch_size=2048,
                                      shuffle=True,
                                      num_workers=0,
                                      pin_memory=False)
            valid_loader = DataLoader(new_valid_ds,
                                      batch_size=2048,
                                      shuffle=True,
                                      num_workers=0,
                                      pin_memory=False)

            # Set model to direct evaluation (for cached data)
            model.set_eval_direct(True)

        # Set up the config
        config = IterativeTrainerConfig()

        base_model_name = self.base_model.preferred_name()

        config.name = '_%s[%s](%s->%s)' % (self.__class__.__name__,
                                           base_model_name, self.args.D1,
                                           self.args.D2)
        config.train_loader = train_loader
        config.valid_loader = valid_loader
        config.phases = {
            'train': {
                'dataset': train_loader,
                'backward': True
            },
            'test': {
                'dataset': valid_loader,
                'backward': False
            },
            'testU': {
                'dataset': old_valid_loader,
                'backward': False
            },
        }
        config.criterion = criterion
        config.classification = True
        config.cast_float_label = True
        config.stochastic_gradient = True
        config.visualize = not self.args.no_visualize
        config.model = model
        config.optim = optim.Adagrad(model.H.parameters(),
                                     lr=1e-1,
                                     weight_decay=0)
        config.scheduler = optim.lr_scheduler.ReduceLROnPlateau(config.optim,
                                                                patience=10,
                                                                threshold=1e-1,
                                                                min_lr=1e-8,
                                                                factor=0.1,
                                                                verbose=True)
        config.logger = Logger()
        config.max_epoch = 100

        return config
Esempio n. 3
0
    def get_H_config(self,
                     train_ds,
                     valid_ds,
                     will_train=True,
                     epsilon=0.0012,
                     temperature=1000):
        print("Preparing training D1+D2 (H)")

        # Initialize the multi-threaded loaders.
        train_loader = DataLoader(train_ds,
                                  batch_size=self.args.batch_size,
                                  shuffle=True,
                                  num_workers=self.args.workers,
                                  pin_memory=True)
        valid_loader = DataLoader(valid_ds,
                                  batch_size=self.args.batch_size,
                                  shuffle=True,
                                  num_workers=self.args.workers,
                                  pin_memory=True)

        # Set up the criterion
        # To make the threshold learning, actually threshold learning
        # the margin must be set to 0.
        criterion = SVMLoss(margin=0.0).to(self.args.device)

        # Set up the model
        model = ODINModelWrapper(self.base_model,
                                 epsilon=epsilon,
                                 temperature=temperature).to(self.args.device)

        old_valid_loader = valid_loader
        if will_train:
            # cache the subnetwork for faster optimization.
            from methods import get_cached
            from torch.utils.data.dataset import TensorDataset

            trainX, trainY = get_cached(model, train_loader, self.args.device)
            validX, validY = get_cached(model, valid_loader, self.args.device)

            new_train_ds = TensorDataset(trainX, trainY)
            x_center = trainX[trainY == 0].mean()
            y_center = trainX[trainY == 1].mean()
            init_value = (x_center + y_center) / 2
            if model.H.threshold.device.type == "cpu":
                model.H.threshold.data = init_value.view((1, ))
            else:
                model.H.threshold.data = init_value.cuda().view((1, ))
            #model.H.threshold.fill_(init_value)
            print("Initializing threshold to %.2f" % (init_value.item()))

            new_valid_ds = TensorDataset(validX, validY)

            # Initialize the new multi-threaded loaders.
            train_loader = DataLoader(new_train_ds,
                                      batch_size=2048,
                                      shuffle=True,
                                      num_workers=0,
                                      pin_memory=False)
            valid_loader = DataLoader(new_valid_ds,
                                      batch_size=2048,
                                      shuffle=True,
                                      num_workers=0,
                                      pin_memory=False)

            # Set model to direct evaluation (for cached data)
            model.set_eval_direct(True)

        # Set up the config
        config = IterativeTrainerConfig()

        base_model_name = self.base_model.__class__.__name__
        if hasattr(self.base_model, 'preferred_name'):
            base_model_name = self.base_model.preferred_name()

        config.name = '_%s[%s](%s-%s)' % (self.__class__.__name__,
                                          base_model_name, self.args.D1,
                                          self.args.D2)
        config.train_loader = train_loader
        config.valid_loader = valid_loader
        config.phases = {
            'train': {
                'dataset': train_loader,
                'backward': True
            },
            'test': {
                'dataset': valid_loader,
                'backward': False
            },
            'testU': {
                'dataset': old_valid_loader,
                'backward': False
            },
        }
        config.criterion = criterion
        config.classification = True
        config.cast_float_label = True
        config.stochastic_gradient = True
        config.visualize = not self.args.no_visualize
        config.model = model
        config.optim = optim.Adagrad(model.H.parameters(),
                                     lr=1e-2,
                                     weight_decay=0)
        config.scheduler = optim.lr_scheduler.ReduceLROnPlateau(config.optim,
                                                                patience=5,
                                                                threshold=1e-1,
                                                                min_lr=1e-8,
                                                                factor=0.1,
                                                                verbose=True)
        h_path = path.join(self.args.experiment_path,
                           '%s' % (self.__class__.__name__),
                           '%d' % (self.default_model),
                           '%s-%s.pth' % (self.args.D1, self.args.D2))
        h_parent = path.dirname(h_path)
        config.logger = Logger(h_parent)
        config.max_epoch = 30

        return config
Esempio n. 4
0
    def get_H_config(self, dataset, will_train=True):
        print("Preparing training D1+D2 (H)")
        print("Mixture size: %s" % colored('%d' % len(dataset), 'green'))

        # 80%, 20% for local train+test
        train_ds, valid_ds = dataset.split_dataset(0.8)

        if self.args.D1 in Global.mirror_augment:
            print(colored("Mirror augmenting %s" % self.args.D1, 'green'))
            new_train_ds = train_ds + MirroredDataset(train_ds)
            train_ds = new_train_ds

        # Initialize the multi-threaded loaders.
        train_loader = DataLoader(train_ds,
                                  batch_size=self.args.batch_size,
                                  shuffle=True,
                                  num_workers=self.args.workers,
                                  pin_memory=True)
        valid_loader = DataLoader(valid_ds,
                                  batch_size=self.args.batch_size,
                                  shuffle=True,
                                  num_workers=self.args.workers,
                                  pin_memory=True)

        # Set up the criterion
        # margin must be non-zero.
        criterion = SVMLoss(margin=1.0).cuda()

        # Set up the model
        model = OTModelWrapper(self.base_model, self.mav,
                               self.weib_models).to(self.args.device)

        old_valid_loader = valid_loader
        if will_train:
            # cache the subnetwork for faster optimization.
            from methods import get_cached
            from torch.utils.data.dataset import TensorDataset

            trainX, trainY = get_cached(model, train_loader, self.args.device)
            validX, validY = get_cached(model, valid_loader, self.args.device)

            trainX_notnan = trainX[torch.logical_not(
                torch.isnan(trainX)[:, 0]).nonzero().squeeze(1)]
            trainY_notnan = trainY[torch.logical_not(
                torch.isnan(trainX)[:, 0]).nonzero().squeeze(1)]
            validX_notnan = validX[torch.logical_not(
                torch.isnan(validX)[:, 0]).nonzero().squeeze(1)]
            validY_notnan = validY[torch.logical_not(
                torch.isnan(validX)[:, 0]).nonzero().squeeze(1)]
            new_train_ds = TensorDataset(trainX_notnan, trainY_notnan)
            new_valid_ds = TensorDataset(validX_notnan, validY_notnan)

            # Initialize the new multi-threaded loaders.
            train_loader = DataLoader(new_train_ds,
                                      batch_size=2048,
                                      shuffle=True,
                                      num_workers=0,
                                      pin_memory=False)
            valid_loader = DataLoader(new_valid_ds,
                                      batch_size=2048,
                                      shuffle=True,
                                      num_workers=0,
                                      pin_memory=False)

            # Set model to direct evaluation (for cached data)
            model.set_eval_direct(True)

        # Set up the config
        config = IterativeTrainerConfig()

        base_model_name = self.base_model.__class__.__name__
        if hasattr(self.base_model, 'preferred_name'):
            base_model_name = self.base_model.preferred_name()

        config.name = '_%s[%s](%s->%s)' % (self.__class__.__name__,
                                           base_model_name, self.args.D1,
                                           self.args.D2)
        config.train_loader = train_loader
        config.valid_loader = valid_loader
        config.phases = {
            'train': {
                'dataset': train_loader,
                'backward': True
            },
            'test': {
                'dataset': valid_loader,
                'backward': False
            },
            'testU': {
                'dataset': old_valid_loader,
                'backward': False
            },
        }
        config.criterion = criterion
        config.classification = True
        config.cast_float_label = True
        config.stochastic_gradient = True
        config.visualize = not self.args.no_visualize
        config.model = model
        config.optim = optim.SGD(model.H.parameters(),
                                 lr=1e-2,
                                 weight_decay=0.0)  #1.0/len(train_ds))
        config.scheduler = optim.lr_scheduler.ReduceLROnPlateau(config.optim,
                                                                patience=10,
                                                                threshold=1e-1,
                                                                min_lr=1e-8,
                                                                factor=0.1,
                                                                verbose=True)
        h_path = path.join(self.args.experiment_path,
                           '%s' % (self.__class__.__name__),
                           '%d' % (self.default_model),
                           '%s-%s.pth' % (self.args.D1, self.args.D2))
        h_parent = path.dirname(h_path)
        config.logger = Logger(h_parent)
        config.max_epoch = 100

        return config