def __init__(self, storage: BaseStorage, keep_best: str = None, time_buffer=option('state.storage.time', 5 * 60, type=int), save_init: bool = False): self.storage = storage self.frequency_epoch: int = option('checkpoint.frequency_epoch', 1, type=int) # Keep best state mechanic self.best_name: str = None self.keep_best: Callable = None if keep_best is not None: self.keep_best = IsBest(keep_best) self.save_init = save_init # Time throttling self.time_buffer = time_buffer self.last_save = datetime.utcnow() # Batch resuming is not supported self.frequency_new_trial: int = 1 self.frequency_end_epoch: int = 1 # cleanup at the end of training self.frequency_end_train: int = 1 self.epoch: int = 0 # checkpoint is done last after all other metrics have finished computing their statistics self.priority: int = -11 self.uid = None self.pending = None
def __init__(self, speed: Speed, max_epochs=None, max_steps=None): self.print_throttle = option('progress.print.throttle', 30, type=int) self.print_fun = print self.throttled_print = TimeThrottler(self.print_fun, every=self.print_throttle) self.max_epochs = max_epochs self.max_steps = max_steps self.speed = speed self.progress_printer = DefaultProgress(self.speed) self.progress_printer = self.select_progress_printer( max_epochs, max_steps) self.frequency_new_epoch: int = 1 self.frequency_end_epoch: int = option('progress.frequency.epoch', 1, type=int) self.frequency_end_batch: int = option('progress.frequency.batch', 1, type=int) self.show_metrics: str = option('progress.show.metrics', 'epoch') self.frequency_trial: int = 0 self.worker_id: int = option('worker.id', -1, type=int) self.first_epoch = None
def fetch_device(): """Set the default device to CPU if cuda is not available""" default = 'cpu' if torch.cuda.is_available(): default = 'cuda' return torch.device(option('device.type', default))
def main(**kwargs): show_dict(kwargs) args = Namespace(**kwargs) set_verbose_level(args.verbose) device = fetch_device() experiment_name = args.experiment_name.format(**kwargs) # save partial results here state_storage = StateStorage( folder=option('state.storage', '/tmp/olympus/classification')) def main_task(): task = classification_baseline(device=device, storage=state_storage, **kwargs) if args.uri is not None: logger = metric_logger(args.uri, args.database, experiment_name) task.metrics.append(logger) return task space = main_task().get_space() # If space is not empty we search the best hyper parameters params = {} if space: show_dict(space) hpo = HPOptimizer('hyperband', space=space, fidelity=Fidelity(args.min_epochs, args.epochs).to_dict()) hpo_task = HPO(hpo, main_task) hpo_task.metrics.append(ElapsedRealTime()) trial = hpo_task.fit(objective='validation_accuracy') print(f'HPO is done, objective: {trial.objective}') params = trial.params else: print('No hyper parameter missing, running the experiment...') # ------ # Run the experiment with the best hyper parameters # ------------------------------------------------- if params is not None: # Train using train + valid for the final result final_task = classification_baseline(device=device, **kwargs, hpo_done=True) final_task.init(**params) final_task.fit(epochs=args.epochs) print('=' * 40) print('Final Trial Results') show_dict(flatten(params)) final_task.report(pprint=True, print_fun=print) print('=' * 40)
class MSGQTracker(Observer): client: _Logger = None frequency_new_trial: int = 1 frequency_start_train: int = 1 frequency_end_train: int = 1 frequency_new_epoch: int = field( default_factory=lambda: option('track.frequency_epoch', 1, type=int)) frequency_end_batch: int = field( default_factory=lambda: option('track.frequency_batch', 0, type=int)) last_save: datetime = None epoch: int = 0 # tracking is done last after all other metrics have finished computing their statistics priority: int = -10 def on_new_trial(self, task, step, parameters, uid): assert uid is not None self.client.uid = uid # We push data on new epoch so for the last epoch # end_train push the last metrics without duplicates def on_new_epoch(self, task, epoch, context): self.epoch = epoch self.client.log(task.metrics.value()) def on_start_train(self, task, step=None): pass def on_end_train(self, task, step=None): if task is not None: self.client.log(task.metrics.value()) def log(self, **kwargs): return self.client.log(kwargs) def value(self): return {} def state_dict(self): return {} def load_state_dict(self, state_dict): pass
def __init__(self, data_path, task_name=None, **kwargs): transformations = None if task_name is None: raise ValueError('do not use this class directly - instantiate a subclass') data_folder = os.path.join(data_path, task_name.upper() if task_name != 'cola' else 'CoLA') # hard-coding the model type for now.. model_name_or_path = 'bert-base-uncased' model_type = 'bert' # and sequence size.. max_seq_length = 128 cache_dir = option('tokenizer.cache', '/tmp/olympus/cache_tok') logger.info('tokenizer cache folder: {}'.format(cache_dir)) tokenizer = BertTokenizer.from_pretrained( 'bert-base-uncased', do_lower_case=True, cache_dir=cache_dir, ) try: train_dataset = load_and_cache_examples( task_name, tokenizer, data_folder, model_name_or_path, max_seq_length, model_type, evaluate=False) test_dataset = load_and_cache_examples( task_name, tokenizer, data_folder, model_name_or_path, max_seq_length, model_type, evaluate=True) except FileNotFoundError: raise ValueError('please point the environment variable OLYMPUS_DATA_PATH ' 'to the folder containing the GLUE data. Currently, it is ' 'set as "{}"'.format(data_path)) super(GLUE, self).__init__( torch.utils.data.ConcatDataset([train_dataset, test_dataset]), test_size=len(test_dataset), transforms=transformations )
def new_seed(**kwargs): """Global seed management""" global SEEDS import random assert len(kwargs) == 1, 'Only single seed can be registered' # Allow user to force seed to change seeds automatically each time the program is ran # Disabled by default automatic_seeding = option('seeding.random', default=False, type=bool) for name, value in kwargs.items(): # do not change the seed if it was already set if name in SEEDS: warning(f'Resetting a global seed for {name}') if not automatic_seeding: SEEDS[name] = value else: val = random.getrandbits(64) SEEDS[name] = val kwargs[name] = val return kwargs.popitem()[1]
def main(bootstrap_seed, random_state, hidden_layer_sizes=150, alpha=0.001, data_path='.', epoch=0, uid=None, experiment_name=None, client=None): """ Parameters ---------- bootstrap_seed: int seed for controling which data-points are selected for training/testing splits random_state: int seed for the generation of weights hidden_layer_sizes: tuple the size of layers ex: (50,) is one layer of 50 neurons solver: one of {‘lbfgs’, ‘sgd’, ‘adam’} solver to use for optimisation alpha: float L2 penalty (regularization term) parameter. ensembling: bool decides if yes or no we will use ensembling for the test set """ hidden_layer_sizes = int(hidden_layer_sizes) # Load Dataset train_data = get_train_dataset(folder=option('data.path', data_path), task='pan_allele', min_nb_examples=1000) valid_data = get_valid_dataset(option('data.path', data_path)) test_data = get_test_dataset(option('data.path', data_path)) # one bootstrap seed for all 3 datasets rng = numpy.random.RandomState(bootstrap_seed) train_data = bootstrap(train_data, rng) valid_data = bootstrap(valid_data, rng) test_data = bootstrap(test_data, rng) # Compute validation and test accuracy additional_metrics = [ AUC(name='validation', loader=[([valid_data[:, :-1]], valid_data[:, -1])]), AUC(name='test', loader=[([test_data[:, :-1]], test_data[:, -1])]) ] # Setup the task task = SklearnTask(MLPRegressor(solver='lbfgs', random_state=random_state), metrics=additional_metrics) # Save the result of your experiment inside a db if client is not None: task.metrics.append( metric_logger(client=client, experiment=experiment_name)) hyper_parameters = dict( model=dict(hidden_layer_sizes=(hidden_layer_sizes, ), alpha=alpha)) show_dict(hyper_parameters) # initialize the task with you configuration task.init(uid=uid, **hyper_parameters) # Train task.fit(train_data[:, :-1], train_data[:, -1]) stats = task.metrics.value() show_dict(stats) return float(stats['validation_aac'])
def segmentation_baseline(model, initializer, optimizer, dataset, batch_size, device, split_method='original', sampler_seed=0, init_seed=0, global_seed=0, storage=None, half=False, hpo_done=False, data_path='/tmp/olympus', validate=True, hyper_parameters=None, uri_metric=None, valid_batch_size=None, **config): set_seeds(global_seed) # dataset size: 2913 dataset = SplitDataset( Dataset(dataset, path=option('data.path', data_path), cache=torch.device('cpu')), split_method=split_method, ) loader = DataLoader( dataset, sampler_seed=sampler_seed, batch_size=batch_size, valid_batch_size=valid_batch_size, pin_memory=True, num_workers=0, ) input_size, target_size = loader.get_shapes() init = Initializer(initializer, seed=init_seed, **get_parameters('initializer', hyper_parameters)) model = Model(model, input_size=input_size, output_size=target_size[0], weight_init=init, half=half) optimizer = Optimizer(optimizer, half=half, **get_parameters('optimizer', hyper_parameters)) lr_schedule = LRSchedule('none', **get_parameters('schedule', hyper_parameters)) train, valid, test = loader.get_loaders(hpo_done=hpo_done) additional_metrics = [] if validate and valid: additional_metrics.append(MeanIoU(name='validation', loader=valid)) if validate and test: additional_metrics.append(MeanIoU(name='test', loader=test)) def get_label_counts(dataloader): cumulative_counts = {} print('get_label_counts(): ', end='') for i, (_, labels) in enumerate(dataloader, 1): if labels.device.type == 'cuda': labels = labels.cpu() unique, counts = np.unique(labels.numpy(), return_counts=True) for u, c in zip(unique, counts): if u not in cumulative_counts: cumulative_counts[u] = 0 cumulative_counts[u] += c if i % (len(dataloader) // 10) == 0: print('{}%... '.format(100 * i // len(dataloader)), end='') print() return cumulative_counts def get_criterion_weight(counts, ignore_index=255): counts = counts.copy() if ignore_index in counts: del counts[ignore_index] total_count = sum([counts[unique] for unique in sorted(counts)]) weight = np.array( [total_count / counts[unique] for unique in sorted(counts)], dtype=np.float32) weight /= weight.size return weight nclasses = 21 counts = get_label_counts(train) weight = get_criterion_weight(counts) weight = torch.tensor(weight) if half: weight = weight.half() criterion = nn.CrossEntropyLoss(weight=weight, ignore_index=255) main_task = Segmentation(model, optimizer, lr_schedule, train, criterion, nclasses, device=device, storage=storage, metrics=additional_metrics) return main_task
args, exc_info, func=None, sinfo=None, **kwargs): start = path.rfind('/olympus/') if start > -1: path = path[start + 1:] return old_factory(name, level, path, lno, msg, args, exc_info, func, sinfo, **kwargs) return log_record if globals().get('oly_log') is None: logging.basicConfig( level=option('logging.level', logging.WARN, type=int), format= '%(asctime)s [%(levelname)8s] %(name)s [%(process)d] %(pathname)s:%(lineno)d %(message)s', stream=sys.stdout) oly_log = logging.getLogger('OLYMPUS') logging.setLogRecordFactory(get_log_record_constructor()) warning = oly_log.warning info = oly_log.info debug = oly_log.debug error = oly_log.error critical = oly_log.critical exception = oly_log.exception
def classification_baseline(model, initializer, optimizer, schedule, dataset, batch_size, device, split_method='original', sampler_seed=0, init_seed=0, transform_seed=0, global_seed=0, transform=True, storage=None, half=False, hpo_done=False, data_path='/tmp/olympus', validate=True, hyper_parameters=None, uri_metric=None, valid_batch_size=None, cache=None, **config): set_seeds(global_seed) dataset = SplitDataset(Dataset(dataset, path=option('data.path', data_path), transform=transform, transform_seed=transform_seed, cache=cache), split_method=split_method) loader = DataLoader(dataset, sampler_seed=sampler_seed, batch_size=batch_size, valid_batch_size=valid_batch_size) input_size, target_size = loader.get_shapes() init = Initializer(initializer, seed=init_seed, **get_parameters('initializer', hyper_parameters)) model = Model(model, input_size=input_size, output_size=target_size[0], weight_init=init, half=half) optimizer = Optimizer(optimizer, half=half, **get_parameters('optimizer', hyper_parameters)) lr_schedule = LRSchedule(schedule, **get_parameters('schedule', hyper_parameters)) train, valid, test = loader.get_loaders(hpo_done=hpo_done) additional_metrics = [] if validate and valid: additional_metrics.append(Accuracy(name='validation', loader=valid)) if validate and test: additional_metrics.append(Accuracy(name='test', loader=test)) main_task = Classification(classifier=model, optimizer=optimizer, lr_scheduler=lr_schedule, dataloader=train, device=device, storage=storage, metrics=additional_metrics) return main_task
from olympus.optimizers import Optimizer, known_optimizers, LRSchedule, known_schedule from olympus.observers import ElapsedRealTime from olympus.tasks import ObjectDetection from olympus.hpo import HPOptimizer, Fidelity from olympus.tasks.hpo import HPO from olympus.utils import fetch_device, set_verbose_level, required, show_dict from olympus.utils.options import option from olympus.utils.storage import StateStorage from olympus.utils.functional import flatten from olympus.metrics import Loss DEFAULT_EXP_NAME = 'detection_{dataset}_{model}_{optimizer}_{lr_scheduler}_{weight_init}' base = option('base_path', '/tmp/olympus') def arguments(): parser = ArgumentParser(prog='detection', description='Detection Baseline') parser.add_argument( '--experiment-name', type=str, default=DEFAULT_EXP_NAME, metavar='EXP_NAME', help='Name of the experiment in Orion storage (default: {})'.format(DEFAULT_EXP_NAME)) parser.add_argument( '--model', type=str, metavar='MODEL_NAME', choices=known_models(), default=required, help='Name of the model') parser.add_argument( '--dataset', type=str, metavar='DATASET_NAME', choices=known_datasets(), default=required, help='Name of the dataset') parser.add_argument(