Ejemplo n.º 1
0
def _generate_tasks(config, steps=2):
    """Generate a set of user defined tasks. Depending on the experiment 
    conducted, a set of splitMNIST or permutedMNIST tasks is returned.
    
    Args:
        config: Command-line arguments.
        steps: Number of classes per tasks. Only applicable for splitMNIST.
    Returns:
        data_handlers: A list of data handlers.
    """
    print('utils num_tasks: ', config.num_tasks)

    if config.experiment == "splitMNIST":
        if 'reference' in config.note:
            from data.special.split_mnist import get_split_MNIST_handlers
            return get_split_MNIST_handlers(config.data_dir, steps=steps)
        elif 'zixuan' in config.note:
            from data.special.my_split_mnist import get_split_MNIST_handlers
            data, taskcla, inputsize = mixemnist.get(seed=config.seed,
                                                     args=config)

            print('=' * 100)
            print('Arguments =')
            for arg in vars(config):
                print('\t' + arg + ':', getattr(config, arg))
            print('=' * 100)

            print('Input size =', inputsize, '\nTask info =', taskcla)
            config.taskcla = taskcla
            return get_split_MNIST_handlers(config.data_dir,
                                            data=data,
                                            config=config)

    elif config.experiment == "permutedMNIST":
        rand = np.random.RandomState(config.data_random_seed)
        pd = config.padding * 2
        permutations = [None] + [
            rand.permutation((28 + pd) * (28 + pd))
            for _ in range(config.num_tasks - 1)
        ]
        if config.upper_bound:
            # FIXME Due to the current implementation of the
            # `PermutedMNISTList`, which resets the batch generator everytime
            # we switch the task, we have to go for the memory inefficient
            # variant here, as this upper bound requires to build batches
            # from multiple datasets.
            # Will be fixed in the future.
            from data.special.permuted_mnist import PermutedMNIST
            return [
                PermutedMNIST(config.data_dir,
                              permutation=p,
                              padding=config.padding) for p in permutations
            ]
        else:
            return PermutedMNISTList(permutations,
                                     config.data_dir,
                                     padding=config.padding,
                                     show_perm_change_msg=False)
    else:
        raise ValueError('Experiment %d unknown!' % config.experiment)
Ejemplo n.º 2
0
def _generate_tasks(config):
    """Generate a set of user defined tasks. Depending on the experiment 
    conducted, a set of splitMNIST or permutedMNIST tasks is returned.
    
    Args:
        config: Command-line arguments.
        steps: Number of classes per tasks. Only applicable for splitMNIST.
    Returns:
        data_handlers: A list of data handlers.
    """
    print('utils num_tasks: ', config.num_tasks)

    if 'mixemnist' in config.note:
        data, taskcla, inputsize = mixemnist.get(seed=config.seed, args=config)
    elif 'mixceleba' in config.note:
        data, taskcla, inputsize = mixceleba.get(seed=config.seed, args=config)

    dims = [x[1] for x in taskcla]
    config.dims = dims

    print('=' * 100)
    print('Arguments =')
    for arg in vars(config):
        print('\t' + arg + ':', getattr(config, arg))
    print('=' * 100)

    print('Input size =', inputsize, '\nTask info =', taskcla)
    config.taskcla = taskcla
    return get_split_handlers(config.data_dir,
                              use_one_hot=True,
                              data=data,
                              config=config,
                              dims=dims)
Ejemplo n.º 3
0
def load_datasets(config, shared, logger, data_dir='../datasets'):
    """Create a data handler per task.

    Note:
        Datasets are generated with targets being 1-hot encoded.

    Args:
        config (argparse.Namespace): Command-line arguments.
        shared (argparse.Namespace): Object for sharing data between functions.
            Contains the type of experiment.
        logger: Logger object.
        data_dir (str): From where to load (or to where to download) the
            datasets?

    Returns:
        (list) A list of data handlers (i.e., objects of class
        :class:`data.dataset.Dataset`.
    """

    if 'mixemnist' in config.note:
        data, taskcla, inputsize = mixemnist.get(seed=config.seed, args=config)
    elif 'mixceleba' in config.note:
        data, taskcla, inputsize = mixceleba.get(seed=config.seed, args=config)

    dims = [x[1] for x in taskcla]
    config.dims = dims

    print('=' * 100)
    print('Arguments =')
    for arg in vars(config):
        print('\t' + arg + ':', getattr(config, arg))
    print('=' * 100)

    print('Input size =', inputsize, '\nTask info =', taskcla)

    # assert(config.num_tasks <= 11)
    # logger.info('Loading CIFAR datasets ...')
    dhandlers = get_split_handlers(data_dir,
                                   use_one_hot=True,
                                   data=data,
                                   config=config,
                                   dims=dims)
    assert (len(dhandlers) == config.num_tasks)

    logger.info('Loaded %d  task(s) into memory.' % config.num_tasks)

    return dhandlers
Ejemplo n.º 4
0
def load_datasets(config, shared, logger, data_dir='../datasets'):
    """Create a data handler per task.

    Note:
        Datasets are generated with targets being 1-hot encoded.

    Args:
        config (argparse.Namespace): Command-line arguments.
        shared (argparse.Namespace): Object for sharing data between functions.
            Contains the type of experiment.
        logger: Logger object.
        data_dir (str): From where to load (or to where to download) the
            datasets?

    Returns:
        (list) A list of data handlers (i.e., objects of class
        :class:`data.dataset.Dataset`.
    """

    if 'reference' in config.note:
        from data.special.split_cifar import get_split_handlers
        augment_data = not config.disable_data_augmentation
        #if shared.experiment == 'zenke':
        #    augment_data = False
        #    # To be comparable to previous results. Note, Zenke et al. didn't
        #    # utilize any data augmentation as far as I know.
        #    logger.warning('Data augmentation disabled for Zenkenet.')
        print('augment_data: ', augment_data)
        augment_data = False
        if augment_data:
            logger.info('Data augmentation will be used.')

        # assert(config.num_tasks <= 11)
        logger.info('Loading CIFAR datasets ...')
        dhandlers = get_split_handlers(data_dir,
                                       use_one_hot=False,
                                       validation_size=500,
                                       use_data_augmentation=augment_data,
                                       num_tasks=config.num_tasks)
        assert (len(dhandlers) == config.num_tasks)

        logger.info('Loaded %d CIFAR task(s) into memory.' % config.num_tasks)

        return dhandlers

    elif 'zixuan' in config.note:
        from data.special.split_mlp import get_split_handlers

        if 'mixemnist' in config.note:
            data, taskcla, inputsize = mixemnist.get(seed=config.seed,
                                                     args=config)
        elif 'mixceleba' in config.note:
            data, taskcla, inputsize = mixceleba.get(seed=config.seed,
                                                     args=config)

        print('=' * 100)
        print('Arguments =')
        for arg in vars(config):
            print('\t' + arg + ':', getattr(config, arg))
        print('=' * 100)

        print('Input size =', inputsize, '\nTask info =', taskcla)
        augment_data = not config.disable_data_augmentation
        #if shared.experiment == 'zenke':
        #    augment_data = False
        #    # To be comparable to previous results. Note, Zenke et al. didn't
        #    # utilize any data augmentation as far as I know.
        #    logger.warning('Data augmentation disabled for Zenkenet.')
        print('augment_data: ', augment_data)
        augment_data = False
        if augment_data:
            logger.info('Data augmentation will be used.')

        # assert(config.num_tasks <= 11)
        logger.info('Loading CIFAR datasets ...')
        dhandlers = get_split_handlers(data_dir,
                                       use_one_hot=False,
                                       data=data,
                                       config=config)
        assert (len(dhandlers) == config.num_tasks)

        logger.info('Loaded %d CIFAR task(s) into memory.' % config.num_tasks)

        return dhandlers