Example #1
0
    def test_initialize(self):

        c = ABC(10, objective_function)
        c.add_param(0, 10)
        c.add_param(0, 10)
        c.initialize()
        self.assertEqual(len(c._bees), 20)
Example #2
0
    def test_kwargs(self):

        c = ABC(10, objective_function_kwargs, {'my_kwarg': 2})
        c.add_param(0, 0)
        c.add_param(0, 0)
        c.initialize()
        self.assertEqual(c.best_ret_val, 2)
Example #3
0
    def test_multiprocessing(self):

        c = ABC(20, objective_function, num_processes=4)
        self.assertEqual(c._num_processes, 4)
        c.add_param(0, 10)
        c.add_param(0, 10)
        c.initialize()
        c.search()
Example #4
0
    def test_custom_param_name(self):

        c = ABC(20, objective_function)
        c.add_param(0, 10, name='int1')
        c.add_param(0, 10, name='int2')
        c.initialize()
        for _ in range(50):
            c.search()
        self.assertEqual(c.best_params, {'int1': 0, 'int2': 0})
Example #5
0
    def test_search(self):

        c = ABC(20, objective_function)
        c.add_param(0, 10)
        c.add_param(0, 10)
        c.initialize()
        for _ in range(50):
            c.search()
        self.assertEqual(c.best_fitness, 1)
        self.assertEqual(c.best_ret_val, 0)
        self.assertEqual(c.best_params, {'P0': 0, 'P1': 0})
Example #6
0
    def test_get_stats(self):

        c = ABC(10, objective_function)
        c.add_param(0, 0)
        c.add_param(0, 0)
        c.initialize()
        self.assertEqual(c.best_fitness, 1)
        self.assertEqual(c.best_ret_val, 0)
        self.assertEqual(c.best_params, {'P0': 0, 'P1': 0})
        self.assertEqual(c.average_fitness, 1)
        self.assertEqual(c.average_ret_val, 0)
Example #7
0
def tune_model_architecture(n_bees: int, n_iter: int, dataset_train: QSPRDataset,
                            dataset_eval: QSPRDataset, n_processes: int = 1,
                            **kwargs) -> dict:
    """
    Tunes model architecture parameters (number of hidden layers, neurons per hidden layer, neuron
    dropout); additional **kwargs can include any in:
        [
            # ECNet parameters
            'epochs' (default 100),
            'batch_size' (default 32),
            'valid_size' (default 0.2),
            'patience' (default 32),
            'lr_decay' (default 0.0),
            # Adam optim. alg. arguments
            'lr' (default 0.001),
            'beta_1' (default 0.9),
            'beta_2' (default 0.999),
            'eps' (default 1e-8),
            'weight_decay' (default 0.0),
            'amsgrad' (default False)
        ]

    Args:
        n_bees (int): number of employer bees to use in ABC algorithm
        n_iter (int): number of iterations, or "search cycles", for ABC algorithm
        dataset_train (QSPRDataset): dataset used to train evaluation models
        dataset_eval (QSPRDataset): dataset used for evaluation
        n_processes (int, optional): if > 1, uses multiprocessing when evaluating at an iteration
        **kwargs: additional arguments 

    Returns:
        dict: {'hidden_dim': int, 'n_hidden': int, 'dropout': float}
    """

    kwargs['train_ds'] = dataset_train
    kwargs['eval_ds'] = dataset_eval
    abc = ABC(n_bees, _cost_arch, num_processes=n_processes, obj_fn_args=kwargs)
    abc.add_param(CONFIG['architecture_params_range']['hidden_dim'][0],
                  CONFIG['architecture_params_range']['hidden_dim'][1], name='hidden_dim')
    abc.add_param(CONFIG['architecture_params_range']['n_hidden'][0],
                  CONFIG['architecture_params_range']['n_hidden'][1], name='n_hidden')
    abc.add_param(CONFIG['architecture_params_range']['dropout'][0],
                  CONFIG['architecture_params_range']['dropout'][1], name='dropout')
    abc.initialize()
    for _ in range(n_iter):
        abc.search()
    return {
        'hidden_dim': abc.best_params['hidden_dim'],
        'n_hidden': abc.best_params['n_hidden'],
        'dropout': abc.best_params['dropout']
    }
Example #8
0
def tune_batch_size(n_bees: int, n_iter: int, dataset_train: QSPRDataset,
                    dataset_eval: QSPRDataset, n_processes: int = 1,
                    **kwargs) -> dict:
    """
    Tunes the batch size during training; additional **kwargs can include any in:
        [
            # ECNet parameters
            'epochs' (default 100),
            'valid_size' (default 0.2),
            'patience' (default 32),
            'lr_decay' (default 0.0),
            'hidden_dim' (default 128),
            'n_hidden' (default 2),
            'dropout': (default 0.0),
            # Adam optim. alg. arguments
            'lr' (default 0.001),
            'beta_1' (default 0.9),
            'beta_2' (default 0.999),
            'eps' (default 1e-8),
            'weight_decay' (default 0.0),
            'amsgrad' (default False)
        ]

    Args:
        n_bees (int): number of employer bees to use in ABC algorithm
        n_iter (int): number of iterations, or "search cycles", for ABC algorithm
        dataset_train (QSPRDataset): dataset used to train evaluation models
        dataset_eval (QSPRDataset): dataset used for evaluation
        n_processes (int, optional): if > 1, uses multiprocessing when evaluating at an iteration
        **kwargs: additional arguments 

    Returns:
        dict: {'batch_size': int}
    """

    kwargs['train_ds'] = dataset_train
    kwargs['eval_ds'] = dataset_eval
    abc = ABC(n_bees, _cost_batch_size, num_processes=n_processes, obj_fn_args=kwargs)
    abc.add_param(1, len(kwargs.get('train_ds').desc_vals), name='batch_size')
    abc.initialize()
    for _ in range(n_iter):
        abc.search()
    return {'batch_size': abc.best_params['batch_size']}
Example #9
0
def main():

    # Create the colony with 10 employer bees and the above objective function
    abc = ABC(10, minimize_integers)

    # Add three integers, randomly initialized between 0 and 10 for each bee
    abc.add_param(0, 10, name='Int_1')
    abc.add_param(0, 10, name='Int_2')
    abc.add_param(0, 10, name='Int_3')

    # Initialize 10 employer bees, 10 onlooker bees
    abc.initialize()

    # Run the search cycle 10 times
    for _ in range(10):
        abc.search()
        print('Average fitness: {}'.format(abc.average_fitness))
        print('Average obj. fn. return value: {}'.format(abc.average_ret_val))
        print('Best fitness score: {}'.format(abc.best_fitness))
        print('Best obj. fn. return value: {}'.format(abc.best_ret_val))
        print('Best parameters: {}\n'.format(abc.best_params))
Example #10
0
def tune_hyperparameters(df: DataFrame,
                         vars: dict,
                         num_employers: int,
                         num_iterations: int,
                         num_processes: int = 1,
                         shuffle: str = None,
                         split: list = None,
                         validate: bool = True,
                         eval_set: str = None,
                         eval_fn: str = 'rmse',
                         epochs: int = 500) -> dict:
    '''Tunes neural network learning/architecture hyperparameters

    Args:
        df (ecnet.utils.data_utils.DataFrame): currently loaded data
        vars (dict): ecnet.Server._vars variables
        num_employers (int): number of employer bees
        num_iterations (int): number of search cycles for the colony
        num_processes (int): number of parallel processes to utilize
        shuffle (str): shuffles `train` or `all` sets if not None
        split (list): if shuffle is True, [learn%, valid%, test%]
        validate (bool): if True, uses periodic validation; otherwise, no
        eval_set (str): set used to evaluate bee performance; `learn`, `valid`,
            `train`, `test`, None (all sets)
        eval_fn (str): error function used to evaluate bee performance; `rmse`,
            `mean_abs_error`, `med_abs_error`
        epochs (int): number of training epochs per bee ANN (def: 500)

    Returns:
        dict: tuned hyperparameters
    '''

    if name != 'nt':
        set_start_method('spawn', force=True)

    logger.log('info',
               'Tuning architecture/learning hyperparameters',
               call_loc='TUNE')
    logger.log('debug',
               'Arguments:\n\t| num_employers:\t{}\n\t| '
               'num_iterations:\t{}\n\t| shuffle:\t\t{}\n\t| split:'
               '\t\t{}\n\t| validate:\t\t{}\n\t| eval_set:\t\t{}\n\t'
               '| eval_fn:\t\t{}'.format(num_employers, num_iterations,
                                         shuffle, split, validate, eval_set,
                                         eval_fn),
               call_loc='TUNE')

    fit_fn_args = {
        'df': df,
        'shuffle': shuffle,
        'num_processes': num_processes,
        'split': split,
        'validate': validate,
        'eval_set': eval_set,
        'eval_fn': eval_fn,
        'hidden_layers': vars['hidden_layers'],
        'epochs': epochs
    }

    to_tune = [(1e-9, 1e-4, 'decay'), (1e-5, 0.1, 'learning_rate'),
               (1, len(df.learn_set), 'batch_size'), (64, 1024, 'patience')]
    for hl in range(len(vars['hidden_layers'])):
        to_tune.append((1, 2 * len(df._input_names), 'hl{}'.format(hl)))

    abc = ABC(num_employers, tune_fitness_function, fit_fn_args, num_processes)
    for param in to_tune:
        abc.add_param(param[0], param[1], name=param[2])
    abc.initialize()

    best_ret_val = abc.best_ret_val
    best_params = abc.best_params
    for i in range(num_iterations):
        logger.log('info', 'Iteration {}'.format(i + 1), call_loc='TUNE')
        abc.search()
        new_best_ret = abc.best_ret_val
        new_best_params = abc.best_params
        logger.log('info',
                   'Best Performer: {}, {}'.format(new_best_ret, new_best_ret),
                   call_loc='TUNE')
        if new_best_ret < best_ret_val:
            best_ret_val = new_best_ret
            best_params = new_best_params

    vars['decay'] = best_params['decay']
    vars['learning_rate'] = best_params['learning_rate']
    vars['batch_size'] = best_params['batch_size']
    vars['patience'] = best_params['patience']
    for l_idx in range(len(vars['hidden_layers'])):
        vars['hidden_layers'][l_idx][0] = best_params['hl{}'.format(l_idx)]
    return vars