def get_H_config(self, dataset, will_train=True): print("Preparing training D1+D2 (H)") print("Mixture size: %s"%colored('%d'%len(dataset), 'green')) # 80%, 20% for local train+test train_ds, valid_ds = dataset.split_dataset(0.8) if self.args.D1 in Global.mirror_augment: print(colored("Mirror augmenting %s"%self.args.D1, 'green')) new_train_ds = train_ds + MirroredDataset(train_ds) train_ds = new_train_ds # Initialize the multi-threaded loaders. train_loader = DataLoader(train_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) valid_loader = DataLoader(valid_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) # To make the threshold learning, actually threshold learning # the margin must be set to 0. criterion = SVMLoss(margin=0.0).to(self.args.device) # Set up the model model = MCDropoutModelWrapper(self.base_model).to(self.args.device) old_valid_loader = valid_loader # By definition, this approach is uncacheable :( # Set up the config config = IterativeTrainerConfig() base_model_name = self.base_model.__class__.__name__ if hasattr(self.base_model, 'preferred_name'): base_model_name = self.base_model.preferred_name() config.name = '_%s[%s](%s-%s)'%(self.__class__.__name__, base_model_name, self.args.D1, self.args.D2) config.train_loader = train_loader config.valid_loader = valid_loader config.phases = { 'train': {'dataset' : train_loader, 'backward': True}, 'test': {'dataset' : valid_loader, 'backward': False}, 'testU': {'dataset' : old_valid_loader, 'backward': False}, } config.criterion = criterion config.classification = True config.cast_float_label = True config.stochastic_gradient = True config.visualize = not self.args.no_visualize config.model = model config.optim = optim.Adagrad(model.H.parameters(), lr=1e-1, weight_decay=0) config.scheduler = optim.lr_scheduler.ReduceLROnPlateau(config.optim, patience=10, threshold=1e-1, min_lr=1e-8, factor=0.1, verbose=True) h_path = path.join(self.args.experiment_path, '%s' % (self.__class__.__name__), '%d' % (self.default_model), '%s-%s.pth' % (self.args.D1, self.args.D2)) h_parent = path.dirname(h_path) config.logger = Logger(h_parent) config.max_epoch = 100 return config
def get_H_config(self, dataset, will_train=True): print("Preparing training D1+D2 (H)") print("Mixture size: %s" % colored('%d' % len(dataset), 'green')) # 80%, 20% for local train+test train_ds, valid_ds = dataset.split_dataset(0.8) if self.args.D1 in Global.mirror_augment: print(colored("Mirror augmenting %s" % self.args.D1, 'green')) new_train_ds = train_ds + MirroredDataset(train_ds) train_ds = new_train_ds # Initialize the multi-threaded loaders. train_loader = DataLoader(train_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) valid_loader = DataLoader(valid_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) # To make the threshold learning, actually threshold learning # the margin must be set to 0. criterion = SVMLoss(margin=0.0).to(self.args.device) # Set up the model model = DeepEnsembleModelWrapper(self.base_model).to(self.args.device) old_valid_loader = valid_loader if will_train: # cache the subnetwork for faster optimization. from methods import get_cached from torch.utils.data.dataset import TensorDataset trainX, trainY = get_cached(model, train_loader, self.args.device) validX, validY = get_cached(model, valid_loader, self.args.device) new_train_ds = TensorDataset(trainX, trainY) new_valid_ds = TensorDataset(validX, validY) # Initialize the new multi-threaded loaders. train_loader = DataLoader(new_train_ds, batch_size=2048, shuffle=True, num_workers=0, pin_memory=False) valid_loader = DataLoader(new_valid_ds, batch_size=2048, shuffle=True, num_workers=0, pin_memory=False) # Set model to direct evaluation (for cached data) model.set_eval_direct(True) # Set up the config config = IterativeTrainerConfig() base_model_name = self.base_model.preferred_name() config.name = '_%s[%s](%s->%s)' % (self.__class__.__name__, base_model_name, self.args.D1, self.args.D2) config.train_loader = train_loader config.valid_loader = valid_loader config.phases = { 'train': { 'dataset': train_loader, 'backward': True }, 'test': { 'dataset': valid_loader, 'backward': False }, 'testU': { 'dataset': old_valid_loader, 'backward': False }, } config.criterion = criterion config.classification = True config.cast_float_label = True config.stochastic_gradient = True config.visualize = not self.args.no_visualize config.model = model config.optim = optim.Adagrad(model.H.parameters(), lr=1e-1, weight_decay=0) config.scheduler = optim.lr_scheduler.ReduceLROnPlateau(config.optim, patience=10, threshold=1e-1, min_lr=1e-8, factor=0.1, verbose=True) config.logger = Logger() config.max_epoch = 100 return config
def get_H_config(self, train_ds, valid_ds, will_train=True, epsilon=0.0012, temperature=1000): print("Preparing training D1+D2 (H)") # Initialize the multi-threaded loaders. train_loader = DataLoader(train_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) valid_loader = DataLoader(valid_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) # Set up the criterion # To make the threshold learning, actually threshold learning # the margin must be set to 0. criterion = SVMLoss(margin=0.0).to(self.args.device) # Set up the model model = ODINModelWrapper(self.base_model, epsilon=epsilon, temperature=temperature).to(self.args.device) old_valid_loader = valid_loader if will_train: # cache the subnetwork for faster optimization. from methods import get_cached from torch.utils.data.dataset import TensorDataset trainX, trainY = get_cached(model, train_loader, self.args.device) validX, validY = get_cached(model, valid_loader, self.args.device) new_train_ds = TensorDataset(trainX, trainY) x_center = trainX[trainY == 0].mean() y_center = trainX[trainY == 1].mean() init_value = (x_center + y_center) / 2 if model.H.threshold.device.type == "cpu": model.H.threshold.data = init_value.view((1, )) else: model.H.threshold.data = init_value.cuda().view((1, )) #model.H.threshold.fill_(init_value) print("Initializing threshold to %.2f" % (init_value.item())) new_valid_ds = TensorDataset(validX, validY) # Initialize the new multi-threaded loaders. train_loader = DataLoader(new_train_ds, batch_size=2048, shuffle=True, num_workers=0, pin_memory=False) valid_loader = DataLoader(new_valid_ds, batch_size=2048, shuffle=True, num_workers=0, pin_memory=False) # Set model to direct evaluation (for cached data) model.set_eval_direct(True) # Set up the config config = IterativeTrainerConfig() base_model_name = self.base_model.__class__.__name__ if hasattr(self.base_model, 'preferred_name'): base_model_name = self.base_model.preferred_name() config.name = '_%s[%s](%s-%s)' % (self.__class__.__name__, base_model_name, self.args.D1, self.args.D2) config.train_loader = train_loader config.valid_loader = valid_loader config.phases = { 'train': { 'dataset': train_loader, 'backward': True }, 'test': { 'dataset': valid_loader, 'backward': False }, 'testU': { 'dataset': old_valid_loader, 'backward': False }, } config.criterion = criterion config.classification = True config.cast_float_label = True config.stochastic_gradient = True config.visualize = not self.args.no_visualize config.model = model config.optim = optim.Adagrad(model.H.parameters(), lr=1e-2, weight_decay=0) config.scheduler = optim.lr_scheduler.ReduceLROnPlateau(config.optim, patience=5, threshold=1e-1, min_lr=1e-8, factor=0.1, verbose=True) h_path = path.join(self.args.experiment_path, '%s' % (self.__class__.__name__), '%d' % (self.default_model), '%s-%s.pth' % (self.args.D1, self.args.D2)) h_parent = path.dirname(h_path) config.logger = Logger(h_parent) config.max_epoch = 30 return config
def get_H_config(self, dataset, will_train=True): print("Preparing training D1+D2 (H)") print("Mixture size: %s" % colored('%d' % len(dataset), 'green')) # 80%, 20% for local train+test train_ds, valid_ds = dataset.split_dataset(0.8) if self.args.D1 in Global.mirror_augment: print(colored("Mirror augmenting %s" % self.args.D1, 'green')) new_train_ds = train_ds + MirroredDataset(train_ds) train_ds = new_train_ds # Initialize the multi-threaded loaders. train_loader = DataLoader(train_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) valid_loader = DataLoader(valid_ds, batch_size=self.args.batch_size, shuffle=True, num_workers=self.args.workers, pin_memory=True) # Set up the criterion # margin must be non-zero. criterion = SVMLoss(margin=1.0).cuda() # Set up the model model = OTModelWrapper(self.base_model, self.mav, self.weib_models).to(self.args.device) old_valid_loader = valid_loader if will_train: # cache the subnetwork for faster optimization. from methods import get_cached from torch.utils.data.dataset import TensorDataset trainX, trainY = get_cached(model, train_loader, self.args.device) validX, validY = get_cached(model, valid_loader, self.args.device) trainX_notnan = trainX[torch.logical_not( torch.isnan(trainX)[:, 0]).nonzero().squeeze(1)] trainY_notnan = trainY[torch.logical_not( torch.isnan(trainX)[:, 0]).nonzero().squeeze(1)] validX_notnan = validX[torch.logical_not( torch.isnan(validX)[:, 0]).nonzero().squeeze(1)] validY_notnan = validY[torch.logical_not( torch.isnan(validX)[:, 0]).nonzero().squeeze(1)] new_train_ds = TensorDataset(trainX_notnan, trainY_notnan) new_valid_ds = TensorDataset(validX_notnan, validY_notnan) # Initialize the new multi-threaded loaders. train_loader = DataLoader(new_train_ds, batch_size=2048, shuffle=True, num_workers=0, pin_memory=False) valid_loader = DataLoader(new_valid_ds, batch_size=2048, shuffle=True, num_workers=0, pin_memory=False) # Set model to direct evaluation (for cached data) model.set_eval_direct(True) # Set up the config config = IterativeTrainerConfig() base_model_name = self.base_model.__class__.__name__ if hasattr(self.base_model, 'preferred_name'): base_model_name = self.base_model.preferred_name() config.name = '_%s[%s](%s->%s)' % (self.__class__.__name__, base_model_name, self.args.D1, self.args.D2) config.train_loader = train_loader config.valid_loader = valid_loader config.phases = { 'train': { 'dataset': train_loader, 'backward': True }, 'test': { 'dataset': valid_loader, 'backward': False }, 'testU': { 'dataset': old_valid_loader, 'backward': False }, } config.criterion = criterion config.classification = True config.cast_float_label = True config.stochastic_gradient = True config.visualize = not self.args.no_visualize config.model = model config.optim = optim.SGD(model.H.parameters(), lr=1e-2, weight_decay=0.0) #1.0/len(train_ds)) config.scheduler = optim.lr_scheduler.ReduceLROnPlateau(config.optim, patience=10, threshold=1e-1, min_lr=1e-8, factor=0.1, verbose=True) h_path = path.join(self.args.experiment_path, '%s' % (self.__class__.__name__), '%d' % (self.default_model), '%s-%s.pth' % (self.args.D1, self.args.D2)) h_parent = path.dirname(h_path) config.logger = Logger(h_parent) config.max_epoch = 100 return config