def fit(self, objective): """Train the model a few times and return a best trial/set of parameters""" if self.stopped: return self.metrics.start_train() while not self.hpo.is_done(): configurations = self.hpo.suggest() for config in configurations: show_dict(config) # uid = config.pop('uid') epoch = config.pop('epoch') new_task = self.task_maker() new_task.init(**config) new_task.fit(epoch) metrics = new_task.metrics.value() result = metrics[objective] # config['uid'] = uid self.hpo.observe(config, result) self.metrics.end_train() return self.hpo.result()
def main(C=1, l1_ratio=0.5, random_state=1, bootstrap_seed=1, epoch=0, uid=None, experiment_name=None, client=None): C = max(C, 1e-10) # Load Dataset data, target = sklearn.datasets.load_breast_cancer(return_X_y=True) dataset_splits = bootstrap(data, target, bootstrap_seed) model = LogisticRegression(random_state) # Compute validation and test accuracy metrics = [ Accuracy(name='validation', loader=[dataset_splits['valid']]), Accuracy(name='test', loader=[dataset_splits['test']]) ] # Setup the task task = SklearnTask(model, metrics) # Save the result of your experiment inside a db if client is not None: task.metrics.append( metric_logger(client=client, experiment=experiment_name)) hyper_parameters = dict(model=dict(C=C, l1_ratio=l1_ratio)) show_dict(hyper_parameters) # initialize the task with you configuration task.init(uid=uid, **hyper_parameters) # Train x, y = dataset_splits['train'] # TODO: make sure that we fit on whole train and validate on whole valid and test task.fit(x, y) # Get the stats about this task setup stats = task.metrics.value() show_dict(stats) return float(stats['validation_error_rate'])
def main(**kwargs): args = Namespace(**kwargs) set_verbose_level(args.verbose) device = fetch_device() experiment_name = args.experiment_name.format(**kwargs) # save partial results here state_storage = StateStorage( folder=option('state.storage', f'{base}/detection')) def main_task(): return detection_baseline(device=device, storage=state_storage, **kwargs) space = main_task().get_space() params = {} if space: show_dict(space) hpo = HPOptimizer('hyperband', space=space, fidelity=Fidelity(args.min_epochs, args.epochs).to_dict()) hpo_task = HPO(hpo, main_task) hpo_task.metrics.append(ElapsedRealTime()) trial = hpo_task.fit(objective='validation_loss') print(f'HPO is done, objective: {trial.objective}') params = trial.params else: print('No hyper parameter missing, running the experiment...') # ------ # Run the experiment with the best hyper parameters # ------------------------------------------------- if params is not None: # Train using train + valid for the final result final_task = detection_baseline(device=device, **kwargs, hpo_done=True) final_task.init(**params) final_task.fit(epochs=args.epochs) print('=' * 40) print('Final Trial Results') show_dict(flatten(params)) final_task.report(pprint=True, print_fun=print) print('=' * 40)
def main(bootstrap_seed, random_state, hidden_layer_sizes=150, alpha=0.001, data_path='.', epoch=0, uid=None, experiment_name=None, client=None): """ Parameters ---------- bootstrap_seed: int seed for controling which data-points are selected for training/testing splits random_state: int seed for the generation of weights hidden_layer_sizes: tuple the size of layers ex: (50,) is one layer of 50 neurons solver: one of {‘lbfgs’, ‘sgd’, ‘adam’} solver to use for optimisation alpha: float L2 penalty (regularization term) parameter. ensembling: bool decides if yes or no we will use ensembling for the test set """ hidden_layer_sizes = int(hidden_layer_sizes) # Load Dataset train_data = get_train_dataset(folder=option('data.path', data_path), task='pan_allele', min_nb_examples=1000) valid_data = get_valid_dataset(option('data.path', data_path)) test_data = get_test_dataset(option('data.path', data_path)) # one bootstrap seed for all 3 datasets rng = numpy.random.RandomState(bootstrap_seed) train_data = bootstrap(train_data, rng) valid_data = bootstrap(valid_data, rng) test_data = bootstrap(test_data, rng) # Compute validation and test accuracy additional_metrics = [ AUC(name='validation', loader=[([valid_data[:, :-1]], valid_data[:, -1])]), AUC(name='test', loader=[([test_data[:, :-1]], test_data[:, -1])]) ] # Setup the task task = SklearnTask(MLPRegressor(solver='lbfgs', random_state=random_state), metrics=additional_metrics) # Save the result of your experiment inside a db if client is not None: task.metrics.append( metric_logger(client=client, experiment=experiment_name)) hyper_parameters = dict( model=dict(hidden_layer_sizes=(hidden_layer_sizes, ), alpha=alpha)) show_dict(hyper_parameters) # initialize the task with you configuration task.init(uid=uid, **hyper_parameters) # Train task.fit(train_data[:, :-1], train_data[:, -1]) stats = task.metrics.value() show_dict(stats) return float(stats['validation_aac'])
def main(bootstrapping_seed=1, sampler_seed=1, init_seed=1, batch_size=16, learning_rate=0.001, momentum=0.9, weight_decay=1e-4, epoch=240, half=False, hpo_done=False, uid=None, experiment_name=None, client=None, clean_on_exit=True, _interrupt=0): base_folder = options('state.storage', '/tmp') storage = StateStorage(folder=base_folder) split_method = { 'split_method': 'bootstrap', 'ratio': 0.25, # This means 50% training, 25% valid, 25% test 'seed': bootstrapping_seed, 'balanced': False } task = segmentation_baseline('fcn_resnet18', 'self_init', 'SGD', dataset='voc-segmentation', batch_size=batch_size, device=fetch_device(), split_method=split_method, sampler_seed=sampler_seed, init_seed=init_seed, storage=storage, half=half, hpo_done=hpo_done, verbose=False, validate=True) hyperparameters = { 'model': { 'initializer': { 'gain': 1.0 } }, 'optimizer': { 'lr': learning_rate, 'momentum': momentum, 'weight_decay': weight_decay } } show_dict(hyperparameters) if client is not None: task.metrics.append( metric_logger(client=client, experiment=experiment_name)) if _interrupt: from studies import InterruptingMetric # Will raise interrupt every `_interrupt` epochs task.metrics.append(InterruptingMetric(frequency_epoch=_interrupt)) storage.time_buffer = 0 task.init(uid=uid, **hyperparameters) task.fit(epochs=epoch) # Remove checkpoint if clean_on_exit: file_path = storage._file(uid) try: os.remove(file_path) print('Removed checkpoint at', file_path) except FileNotFoundError: print('No checkpoint at ', file_path) show_dict(task.metrics.value()) return float(task.metrics.value()['validation_mean_jaccard_distance'])
from olympus.datasets import Dataset, DataLoader, SplitDataset from olympus.models import Model from olympus.optimizers import Optimizer from olympus.utils.stat import StatStream from olympus.utils import show_dict, fetch_device parser = argparse.ArgumentParser() parser.add_argument('--dataset', default='cifar10', type=str) parser.add_argument('--model', default='vgg11', type=str) parser.add_argument('--caching', action='store_true', dest='caching') parser.add_argument('--no-caching', action='store_false', dest='caching') parser.add_argument('--batch-size', default=128, type=int) parser.add_argument('--warmup', default=4, type=int) parser.add_argument('--repeat', default=10, type=int) args = parser.parse_args() show_dict(vars(args)) device = fetch_device() if args.caching: args.caching = device dataset = SplitDataset(Dataset(args.dataset, cache=args.caching, transform=False), split_method='original') loaders = DataLoader(dataset, batch_size=args.batch_size, sampler_seed=0) input_size, target_size = loaders.get_shapes() model = Model(args.model, input_size=input_size, output_size=target_size[0]).init()
batch = self.preprocessor(self.postprocessor(batch)) noises = self.get_noise(batch, original_image) return batch, noises builders = {'gradient_ascent': GradientAscentAdversary} if __name__ == '__main__': import torchvision.models as models from PIL import Image from olympus.dashboard.plots.saliency import imagenet_preprocess, imagenet_postprocessor path = '/home/setepenre/work/olympus/docs/_static/images/cat.jpg' img = Image.open(path) # img = torch.randn((1, 3, 224, 224)) model = models.vgg19(pretrained=True) adversary = GradientAscentAdversary(imagenet_preprocess, imagenet_postprocessor, model, target_class=283) samples, noise = adversary.generate([img], min_confidence=0.90, lr=1) for s, n in zip(samples, noise): n.save('noise.jpg') s.save('adversary.jpg') show_dict(adversary.report())
def on_end_batch(self, task, step, input=None, context=None): self.show_progress() if task is not None and self.show_metrics == 'batch': show_dict(task.metrics.value(), print_fun=self.print_fun)
def on_end_epoch(self, task, epoch, context): self.reset_throttle() self.show_progress('', '\n') if task is not None and self.show_metrics == 'epoch': show_dict(task.metrics.value(), print_fun=self.print_fun)
def on_end_train(self, task, step=None): self.print_fun('Completed training') if task: show_dict(task.metrics.value())
def on_resume_train(self, task, epoch): self.print_fun('Resuming at epoch', epoch) if task: show_dict(task.metrics.value(), print_fun=self.print_fun)
def on_start_train(self, task, step=None): self.print_fun('Starting') if task: show_dict(task.metrics.value(), print_fun=self.print_fun)
def main(task='rte', bootstrapping_seed=1, sampler_seed=1, init_seed=1, global_seed=1, learning_rate=0.00002, beta1=0.9, beta2=0.999, weight_decay=0.0, attention_probs_dropout_prob=0.1, hidden_dropout_prob=0.1, batch_size=32, weight_init='normal', warmup=0, ratio=0.1, init_std=0.2, epoch=3, half=False, hpo_done=False, uid=None, experiment_name=None, client=None, clean_on_exit=True, _interrupt=0): print('seeds: init {} / global {} / sampler {} / bootstrapping {}'.format( init_seed, global_seed, sampler_seed, bootstrapping_seed)) base_folder = options('state.storage', '/tmp/storage') storage = StateStorage(folder=base_folder) split_method = { 'split_method': 'bootstrap', 'ratio': ratio, 'seed': bootstrapping_seed, 'balanced': False } task = classification_baseline("bert-{}".format(task), 'normal', 'adam', schedule='warmup', dataset="glue-{}".format(task), split_method=split_method, sampler_seed=sampler_seed, init_seed=init_seed, batch_size=batch_size, device=fetch_device(), storage=storage, half=half, hpo_done=hpo_done, verbose=False, validate=True) hyperparameters = dict(model={ 'initializer': { 'mean': 0.0, 'std': init_std }, 'attention_probs_dropout_prob': attention_probs_dropout_prob, 'hidden_dropout_prob': hidden_dropout_prob }, optimizer={ 'lr': learning_rate, 'beta1': beta1, 'beta2': beta2, 'weight_decay': weight_decay }, lr_schedule={ 'warmup_steps': warmup, 'max_steps': epoch * len(task.dataloader), 'iterations': 'step' }) show_dict(hyperparameters) if client is not None: task.metrics.append( metric_logger(client=client, experiment=experiment_name)) if _interrupt: from studies import InterruptingMetric # Will raise interrupt every `_interrupt` epochs task.metrics.append(InterruptingMetric(frequency_epoch=_interrupt)) task.init(uid=uid, **hyperparameters) # NOTE: Seed global once all special inits are done. set_seeds(global_seed) task.fit(epochs=epoch) # Remove checkpoint if clean_on_exit: file_path = storage._file(uid) try: os.remove(file_path) print('Removed checkpoint at', file_path) except FileNotFoundError: print('No checkpoint at ', file_path) return task.metrics.value().get('validation_error_rate', None)
loader = DataLoader(splits, sampler_seed=1, batch_size=32) main_task = Classification( classifier=model, optimizer=optimizer, lr_scheduler=lr_schedule, dataloader=loader.train(), device=device, storage=StateStorage(folder=f'{base}/hpo_simple')) main_task.metrics.append( Accuracy(name='validation', loader=loader.valid(batch_size=64))) return main_task space = make_task().get_space() hp_optimizer = HPOptimizer('hyperband', fidelity=Fidelity(1, 30).to_dict(), space=space) hpo_task = HPO(hp_optimizer, make_task) result = hpo_task.fit(objective='validation_accuracy') print('Best Params:') print('-' * 40) print(f'validation_accuracy: {result.objective}') show_dict(result.params)
def main(bootstrapping_seed=1, sampler_seed=1, transform_seed=1, init_seed=1, learning_rate=0.1, momentum=0.9, weight_decay=5e-4, gamma=0.99, weight_init='glorot_uniform', epoch=120, half=False, hpo_done=False, uid=None, experiment_name=None, client=None, clean_on_exit=True, _interrupt=0): base_folder = options('state.storage', '/tmp') storage = StateStorage(folder=base_folder, time_buffer=5 * 60) print(base_folder) sampling_method = { 'split_method': 'bootstrap', 'ratio': 0.1666, 'seed': bootstrapping_seed, 'balanced': True } batch_size = 128 task = classification_baseline('vgg11', 'glorot_uniform', 'sgd', schedule='exponential', dataset='cifar10', batch_size=batch_size, device=fetch_device(), data_augment=True, split_method=sampling_method, sampler_seed=sampler_seed, transform_seed=transform_seed, init_seed=init_seed, storage=storage, half=half, hpo_done=hpo_done, verbose=False, validate=True) hyperparameters = dict(model={'initializer': { 'gain': 1.0 }}, optimizer=dict(lr=learning_rate, momentum=momentum, weight_decay=weight_decay), lr_schedule=dict(gamma=gamma)) show_dict(hyperparameters) if client is not None: task.metrics.append( metric_logger(client=client, experiment=experiment_name)) if _interrupt: from studies import InterruptingMetric # Will raise interrupt every `_interrupt` epochs task.metrics.append(InterruptingMetric(frequency_epoch=_interrupt)) storage.time_buffer = 0 task.init(uid=uid, **hyperparameters) task.fit(epochs=epoch) # Remove checkpoint if clean_on_exit: file_path = storage._file(uid) try: os.remove(file_path) print('Removed checkpoint at', file_path) except FileNotFoundError: print('No checkpoint at ', file_path) show_dict(task.metrics.value()) return float(task.metrics.value()['validation_error_rate'])