Beispiel #1
0
    def __init__(self, storage: BaseStorage, keep_best: str = None, time_buffer=option('state.storage.time', 5 * 60, type=int),
                 save_init: bool = False):
        self.storage = storage
        self.frequency_epoch: int = option('checkpoint.frequency_epoch', 1, type=int)

        # Keep best state mechanic
        self.best_name: str = None
        self.keep_best: Callable = None
        if keep_best is not None:
            self.keep_best = IsBest(keep_best)

        self.save_init = save_init

        # Time throttling
        self.time_buffer = time_buffer
        self.last_save = datetime.utcnow()

        # Batch resuming is not supported
        self.frequency_new_trial: int = 1
        self.frequency_end_epoch: int = 1
        # cleanup at the end of training
        self.frequency_end_train: int = 1

        self.epoch: int = 0
        # checkpoint is done last after all other metrics have finished computing their statistics
        self.priority: int = -11
        self.uid = None

        self.pending = None
Beispiel #2
0
    def __init__(self, speed: Speed, max_epochs=None, max_steps=None):
        self.print_throttle = option('progress.print.throttle', 30, type=int)
        self.print_fun = print
        self.throttled_print = TimeThrottler(self.print_fun,
                                             every=self.print_throttle)

        self.max_epochs = max_epochs
        self.max_steps = max_steps
        self.speed = speed

        self.progress_printer = DefaultProgress(self.speed)
        self.progress_printer = self.select_progress_printer(
            max_epochs, max_steps)

        self.frequency_new_epoch: int = 1
        self.frequency_end_epoch: int = option('progress.frequency.epoch',
                                               1,
                                               type=int)
        self.frequency_end_batch: int = option('progress.frequency.batch',
                                               1,
                                               type=int)
        self.show_metrics: str = option('progress.show.metrics', 'epoch')
        self.frequency_trial: int = 0
        self.worker_id: int = option('worker.id', -1, type=int)
        self.first_epoch = None
Beispiel #3
0
def fetch_device():
    """Set the default device to CPU if cuda is not available"""
    default = 'cpu'
    if torch.cuda.is_available():
        default = 'cuda'

    return torch.device(option('device.type', default))
Beispiel #4
0
def main(**kwargs):
    show_dict(kwargs)

    args = Namespace(**kwargs)
    set_verbose_level(args.verbose)

    device = fetch_device()
    experiment_name = args.experiment_name.format(**kwargs)

    # save partial results here
    state_storage = StateStorage(
        folder=option('state.storage', '/tmp/olympus/classification'))

    def main_task():
        task = classification_baseline(device=device,
                                       storage=state_storage,
                                       **kwargs)

        if args.uri is not None:
            logger = metric_logger(args.uri, args.database, experiment_name)
            task.metrics.append(logger)

        return task

    space = main_task().get_space()

    # If space is not empty we search the best hyper parameters
    params = {}
    if space:
        show_dict(space)
        hpo = HPOptimizer('hyperband',
                          space=space,
                          fidelity=Fidelity(args.min_epochs,
                                            args.epochs).to_dict())

        hpo_task = HPO(hpo, main_task)
        hpo_task.metrics.append(ElapsedRealTime())

        trial = hpo_task.fit(objective='validation_accuracy')
        print(f'HPO is done, objective: {trial.objective}')
        params = trial.params
    else:
        print('No hyper parameter missing, running the experiment...')
    # ------

    # Run the experiment with the best hyper parameters
    # -------------------------------------------------
    if params is not None:
        # Train using train + valid for the final result
        final_task = classification_baseline(device=device,
                                             **kwargs,
                                             hpo_done=True)
        final_task.init(**params)
        final_task.fit(epochs=args.epochs)

        print('=' * 40)
        print('Final Trial Results')
        show_dict(flatten(params))
        final_task.report(pprint=True, print_fun=print)
        print('=' * 40)
Beispiel #5
0
class MSGQTracker(Observer):
    client: _Logger = None

    frequency_new_trial: int = 1
    frequency_start_train: int = 1
    frequency_end_train: int = 1

    frequency_new_epoch: int = field(
        default_factory=lambda: option('track.frequency_epoch', 1, type=int))
    frequency_end_batch: int = field(
        default_factory=lambda: option('track.frequency_batch', 0, type=int))

    last_save: datetime = None
    epoch: int = 0
    # tracking is done last after all other metrics have finished computing their statistics
    priority: int = -10

    def on_new_trial(self, task, step, parameters, uid):
        assert uid is not None
        self.client.uid = uid

    # We push data on new epoch so for the last epoch
    # end_train push the last metrics without duplicates
    def on_new_epoch(self, task, epoch, context):
        self.epoch = epoch
        self.client.log(task.metrics.value())

    def on_start_train(self, task, step=None):
        pass

    def on_end_train(self, task, step=None):
        if task is not None:
            self.client.log(task.metrics.value())

    def log(self, **kwargs):
        return self.client.log(kwargs)

    def value(self):
        return {}

    def state_dict(self):
        return {}

    def load_state_dict(self, state_dict):
        pass
Beispiel #6
0
    def __init__(self, data_path, task_name=None, **kwargs):
        transformations = None

        if task_name is None:
            raise ValueError('do not use this class directly - instantiate a subclass')
        data_folder = os.path.join(data_path, task_name.upper() if task_name != 'cola' else 'CoLA')
        # hard-coding the model type for now..
        model_name_or_path = 'bert-base-uncased'
        model_type = 'bert'
        # and sequence size..
        max_seq_length = 128

        cache_dir = option('tokenizer.cache', '/tmp/olympus/cache_tok')
        logger.info('tokenizer cache folder: {}'.format(cache_dir))
        tokenizer = BertTokenizer.from_pretrained(
            'bert-base-uncased',
            do_lower_case=True,
            cache_dir=cache_dir,
        )

        try:
            train_dataset = load_and_cache_examples(
                    task_name, tokenizer, data_folder, model_name_or_path, max_seq_length, model_type,
                    evaluate=False)
            test_dataset = load_and_cache_examples(
                    task_name, tokenizer, data_folder, model_name_or_path, max_seq_length, model_type,
                    evaluate=True)
        except FileNotFoundError:
            raise ValueError('please point the environment variable OLYMPUS_DATA_PATH '
                             'to the folder containing the GLUE data. Currently, it is '
                             'set as "{}"'.format(data_path))

        super(GLUE, self).__init__(
            torch.utils.data.ConcatDataset([train_dataset, test_dataset]),
            test_size=len(test_dataset),
            transforms=transformations
        )
Beispiel #7
0
def new_seed(**kwargs):
    """Global seed management"""
    global SEEDS
    import random
    assert len(kwargs) == 1, 'Only single seed can be registered'

    # Allow user to force seed to change seeds automatically each time the program is ran
    # Disabled by default
    automatic_seeding = option('seeding.random', default=False, type=bool)

    for name, value in kwargs.items():
        # do not change the seed if it was already set
        if name in SEEDS:
            warning(f'Resetting a global seed for {name}')

        if not automatic_seeding:
            SEEDS[name] = value

        else:
            val = random.getrandbits(64)
            SEEDS[name] = val
            kwargs[name] = val

    return kwargs.popitem()[1]
Beispiel #8
0
def main(bootstrap_seed,
         random_state,
         hidden_layer_sizes=150,
         alpha=0.001,
         data_path='.',
         epoch=0,
         uid=None,
         experiment_name=None,
         client=None):
    """

    Parameters
    ----------
    bootstrap_seed: int
        seed for controling which data-points are selected for training/testing splits
    random_state: int
        seed for the generation of weights
    hidden_layer_sizes: tuple
        the size of layers ex: (50,) is one layer of 50 neurons
    solver: one of {‘lbfgs’, ‘sgd’, ‘adam’}
        solver to use for optimisation
    alpha: float
        L2 penalty (regularization term) parameter.
    ensembling: bool
        decides if yes or no we will use ensembling for the test set

    """
    hidden_layer_sizes = int(hidden_layer_sizes)
    # Load Dataset
    train_data = get_train_dataset(folder=option('data.path', data_path),
                                   task='pan_allele',
                                   min_nb_examples=1000)
    valid_data = get_valid_dataset(option('data.path', data_path))
    test_data = get_test_dataset(option('data.path', data_path))

    # one bootstrap seed for all 3 datasets
    rng = numpy.random.RandomState(bootstrap_seed)
    train_data = bootstrap(train_data, rng)
    valid_data = bootstrap(valid_data, rng)
    test_data = bootstrap(test_data, rng)

    # Compute validation and test accuracy
    additional_metrics = [
        AUC(name='validation',
            loader=[([valid_data[:, :-1]], valid_data[:, -1])]),
        AUC(name='test', loader=[([test_data[:, :-1]], test_data[:, -1])])
    ]

    # Setup the task
    task = SklearnTask(MLPRegressor(solver='lbfgs', random_state=random_state),
                       metrics=additional_metrics)

    # Save the result of your experiment inside a db
    if client is not None:
        task.metrics.append(
            metric_logger(client=client, experiment=experiment_name))
    hyper_parameters = dict(
        model=dict(hidden_layer_sizes=(hidden_layer_sizes, ), alpha=alpha))
    show_dict(hyper_parameters)

    # initialize the task with you configuration
    task.init(uid=uid, **hyper_parameters)

    # Train
    task.fit(train_data[:, :-1], train_data[:, -1])
    stats = task.metrics.value()
    show_dict(stats)

    return float(stats['validation_aac'])
Beispiel #9
0
def segmentation_baseline(model,
                          initializer,
                          optimizer,
                          dataset,
                          batch_size,
                          device,
                          split_method='original',
                          sampler_seed=0,
                          init_seed=0,
                          global_seed=0,
                          storage=None,
                          half=False,
                          hpo_done=False,
                          data_path='/tmp/olympus',
                          validate=True,
                          hyper_parameters=None,
                          uri_metric=None,
                          valid_batch_size=None,
                          **config):
    set_seeds(global_seed)

    # dataset size: 2913
    dataset = SplitDataset(
        Dataset(dataset,
                path=option('data.path', data_path),
                cache=torch.device('cpu')),
        split_method=split_method,
    )

    loader = DataLoader(
        dataset,
        sampler_seed=sampler_seed,
        batch_size=batch_size,
        valid_batch_size=valid_batch_size,
        pin_memory=True,
        num_workers=0,
    )

    input_size, target_size = loader.get_shapes()

    init = Initializer(initializer,
                       seed=init_seed,
                       **get_parameters('initializer', hyper_parameters))

    model = Model(model,
                  input_size=input_size,
                  output_size=target_size[0],
                  weight_init=init,
                  half=half)

    optimizer = Optimizer(optimizer,
                          half=half,
                          **get_parameters('optimizer', hyper_parameters))

    lr_schedule = LRSchedule('none',
                             **get_parameters('schedule', hyper_parameters))

    train, valid, test = loader.get_loaders(hpo_done=hpo_done)

    additional_metrics = []

    if validate and valid:
        additional_metrics.append(MeanIoU(name='validation', loader=valid))

    if validate and test:
        additional_metrics.append(MeanIoU(name='test', loader=test))

    def get_label_counts(dataloader):
        cumulative_counts = {}
        print('get_label_counts(): ', end='')
        for i, (_, labels) in enumerate(dataloader, 1):
            if labels.device.type == 'cuda':
                labels = labels.cpu()
            unique, counts = np.unique(labels.numpy(), return_counts=True)
            for u, c in zip(unique, counts):
                if u not in cumulative_counts:
                    cumulative_counts[u] = 0
                cumulative_counts[u] += c
            if i % (len(dataloader) // 10) == 0:
                print('{}%... '.format(100 * i // len(dataloader)), end='')
        print()
        return cumulative_counts

    def get_criterion_weight(counts, ignore_index=255):
        counts = counts.copy()
        if ignore_index in counts:
            del counts[ignore_index]
        total_count = sum([counts[unique] for unique in sorted(counts)])
        weight = np.array(
            [total_count / counts[unique] for unique in sorted(counts)],
            dtype=np.float32)
        weight /= weight.size
        return weight

    nclasses = 21
    counts = get_label_counts(train)
    weight = get_criterion_weight(counts)
    weight = torch.tensor(weight)
    if half:
        weight = weight.half()
    criterion = nn.CrossEntropyLoss(weight=weight, ignore_index=255)

    main_task = Segmentation(model,
                             optimizer,
                             lr_schedule,
                             train,
                             criterion,
                             nclasses,
                             device=device,
                             storage=storage,
                             metrics=additional_metrics)

    return main_task
Beispiel #10
0
                   args,
                   exc_info,
                   func=None,
                   sinfo=None,
                   **kwargs):
        start = path.rfind('/olympus/')
        if start > -1:
            path = path[start + 1:]
        return old_factory(name, level, path, lno, msg, args, exc_info, func,
                           sinfo, **kwargs)

    return log_record


if globals().get('oly_log') is None:
    logging.basicConfig(
        level=option('logging.level', logging.WARN, type=int),
        format=
        '%(asctime)s [%(levelname)8s] %(name)s [%(process)d] %(pathname)s:%(lineno)d %(message)s',
        stream=sys.stdout)

    oly_log = logging.getLogger('OLYMPUS')
    logging.setLogRecordFactory(get_log_record_constructor())

    warning = oly_log.warning
    info = oly_log.info
    debug = oly_log.debug
    error = oly_log.error
    critical = oly_log.critical
    exception = oly_log.exception
Beispiel #11
0
def classification_baseline(model,
                            initializer,
                            optimizer,
                            schedule,
                            dataset,
                            batch_size,
                            device,
                            split_method='original',
                            sampler_seed=0,
                            init_seed=0,
                            transform_seed=0,
                            global_seed=0,
                            transform=True,
                            storage=None,
                            half=False,
                            hpo_done=False,
                            data_path='/tmp/olympus',
                            validate=True,
                            hyper_parameters=None,
                            uri_metric=None,
                            valid_batch_size=None,
                            cache=None,
                            **config):

    set_seeds(global_seed)

    dataset = SplitDataset(Dataset(dataset,
                                   path=option('data.path', data_path),
                                   transform=transform,
                                   transform_seed=transform_seed,
                                   cache=cache),
                           split_method=split_method)

    loader = DataLoader(dataset,
                        sampler_seed=sampler_seed,
                        batch_size=batch_size,
                        valid_batch_size=valid_batch_size)

    input_size, target_size = loader.get_shapes()

    init = Initializer(initializer,
                       seed=init_seed,
                       **get_parameters('initializer', hyper_parameters))

    model = Model(model,
                  input_size=input_size,
                  output_size=target_size[0],
                  weight_init=init,
                  half=half)

    optimizer = Optimizer(optimizer,
                          half=half,
                          **get_parameters('optimizer', hyper_parameters))

    lr_schedule = LRSchedule(schedule,
                             **get_parameters('schedule', hyper_parameters))

    train, valid, test = loader.get_loaders(hpo_done=hpo_done)

    additional_metrics = []

    if validate and valid:
        additional_metrics.append(Accuracy(name='validation', loader=valid))

    if validate and test:
        additional_metrics.append(Accuracy(name='test', loader=test))

    main_task = Classification(classifier=model,
                               optimizer=optimizer,
                               lr_scheduler=lr_schedule,
                               dataloader=train,
                               device=device,
                               storage=storage,
                               metrics=additional_metrics)

    return main_task
Beispiel #12
0
from olympus.optimizers import Optimizer, known_optimizers, LRSchedule, known_schedule
from olympus.observers import ElapsedRealTime

from olympus.tasks import ObjectDetection
from olympus.hpo import HPOptimizer, Fidelity
from olympus.tasks.hpo import HPO

from olympus.utils import fetch_device, set_verbose_level, required, show_dict
from olympus.utils.options import option
from olympus.utils.storage import StateStorage
from olympus.utils.functional import flatten
from olympus.metrics import Loss


DEFAULT_EXP_NAME = 'detection_{dataset}_{model}_{optimizer}_{lr_scheduler}_{weight_init}'
base = option('base_path', '/tmp/olympus')


def arguments():
    parser = ArgumentParser(prog='detection', description='Detection Baseline')

    parser.add_argument(
        '--experiment-name', type=str, default=DEFAULT_EXP_NAME,  metavar='EXP_NAME',
        help='Name of the experiment in Orion storage (default: {})'.format(DEFAULT_EXP_NAME))
    parser.add_argument(
        '--model', type=str, metavar='MODEL_NAME', choices=known_models(), default=required,
        help='Name of the model')
    parser.add_argument(
        '--dataset', type=str, metavar='DATASET_NAME', choices=known_datasets(), default=required,
        help='Name of the dataset')
    parser.add_argument(