Esempio n. 1
0
    def test_initialize(self):

        c = ABC(10, objective_function)
        c.add_param(0, 10)
        c.add_param(0, 10)
        c.initialize()
        self.assertEqual(len(c._bees), 20)
Esempio n. 2
0
    def test_kwargs(self):

        c = ABC(10, objective_function_kwargs, {'my_kwarg': 2})
        c.add_param(0, 0)
        c.add_param(0, 0)
        c.initialize()
        self.assertEqual(c.best_ret_val, 2)
Esempio n. 3
0
    def test_multiprocessing(self):

        c = ABC(20, objective_function, num_processes=4)
        self.assertEqual(c._num_processes, 4)
        c.add_param(0, 10)
        c.add_param(0, 10)
        c.initialize()
        c.search()
Esempio n. 4
0
    def test_custom_param_name(self):

        c = ABC(20, objective_function)
        c.add_param(0, 10, name='int1')
        c.add_param(0, 10, name='int2')
        c.initialize()
        for _ in range(50):
            c.search()
        self.assertEqual(c.best_params, {'int1': 0, 'int2': 0})
Esempio n. 5
0
    def test_search(self):

        c = ABC(20, objective_function)
        c.add_param(0, 10)
        c.add_param(0, 10)
        c.initialize()
        for _ in range(50):
            c.search()
        self.assertEqual(c.best_fitness, 1)
        self.assertEqual(c.best_ret_val, 0)
        self.assertEqual(c.best_params, {'P0': 0, 'P1': 0})
Esempio n. 6
0
    def test_get_stats(self):

        c = ABC(10, objective_function)
        c.add_param(0, 0)
        c.add_param(0, 0)
        c.initialize()
        self.assertEqual(c.best_fitness, 1)
        self.assertEqual(c.best_ret_val, 0)
        self.assertEqual(c.best_params, {'P0': 0, 'P1': 0})
        self.assertEqual(c.average_fitness, 1)
        self.assertEqual(c.average_ret_val, 0)
Esempio n. 7
0
def tune_training_parameters(n_bees: int, n_iter: int, dataset_train: QSPRDataset,
                             dataset_eval: QSPRDataset, n_processes: int = 1,
                             **kwargs) -> dict:
    """
    Tunes learning rate, learning rate decay; additional **kwargs can include any in:
        [
            # ECNet parameters
            'epochs' (default 100),
            'batch_size' (default 32),
            'valid_size' (default 0.2),
            'patience' (default 32),
            'hidden_dim' (default 128),
            'n_hidden' (default 2),
            'dropout': (default 0.0),
            # Adam optim. alg. arguments
            'beta_1' (default 0.9),
            'beta_2' (default 0.999),
            'eps' (default 1e-8),
            'weight_decay' (default 0.0),
            'amsgrad' (default False)
        ]

    Args:
        n_bees (int): number of employer bees to use in ABC algorithm
        n_iter (int): number of iterations, or "search cycles", for ABC algorithm
        dataset_train (QSPRDataset): dataset used to train evaluation models
        dataset_eval (QSPRDataset): dataset used for evaluation
        n_processes (int, optional): if > 1, uses multiprocessing when evaluating at an iteration
        **kwargs: additional arguments 

    Returns:
        dict: {'lr': float, 'lr_decay': float}
    """

    kwargs['train_ds'] = dataset_train
    kwargs['eval_ds'] = dataset_eval
    abc = ABC(n_bees, _cost_train_hp, num_processes=n_processes, obj_fn_args=kwargs)
    abc.add_param(CONFIG['training_params_range']['lr'][0],
                  CONFIG['training_params_range']['lr'][1], name='lr')
    abc.add_param(CONFIG['training_params_range']['lr_decay'][0],
                  CONFIG['training_params_range']['lr_decay'][1], name='lr_decay')
    abc.initialize()
    for _ in range(n_iter):
        abc.search()
    return {
        'lr': abc.best_params['lr'],
        'lr_decay': abc.best_params['lr_decay']
    }
Esempio n. 8
0
def main():

    # Create the colony with 10 employer bees and the above objective function
    abc = ABC(10, minimize_integers)

    # Add three integers, randomly initialized between 0 and 10 for each bee
    abc.add_param(0, 10, name='Int_1')
    abc.add_param(0, 10, name='Int_2')
    abc.add_param(0, 10, name='Int_3')

    # Initialize 10 employer bees, 10 onlooker bees
    abc.initialize()

    # Run the search cycle 10 times
    for _ in range(10):
        abc.search()
        print('Average fitness: {}'.format(abc.average_fitness))
        print('Average obj. fn. return value: {}'.format(abc.average_ret_val))
        print('Best fitness score: {}'.format(abc.best_fitness))
        print('Best obj. fn. return value: {}'.format(abc.best_ret_val))
        print('Best parameters: {}\n'.format(abc.best_params))
Esempio n. 9
0
    def test_add_parameter(self):

        c = ABC(10, objective_function)
        c.add_param(0, 1)
        c.add_param(2, 3)
        c.add_param(4, 5)
        self.assertEqual(len(c._params), 3)
        self.assertEqual(c._params[0]._min_val, 0)
        self.assertEqual(c._params[0]._max_val, 1)
        self.assertEqual(c._params[1]._min_val, 2)
        self.assertEqual(c._params[1]._max_val, 3)
        self.assertEqual(c._params[2]._min_val, 4)
        self.assertEqual(c._params[2]._max_val, 5)
        self.assertEqual(c._params[0]._dtype, int)
        c.add_param(0.0, 1.0)
        self.assertEqual(c._params[3]._dtype, float)
        self.assertTrue(c._params[0]._restrict)
        c.add_param(0, 1, False)
        self.assertFalse(c._params[4]._restrict)
Esempio n. 10
0
def tune_hyperparameters(df: DataFrame,
                         vars: dict,
                         num_employers: int,
                         num_iterations: int,
                         num_processes: int = 1,
                         shuffle: str = None,
                         split: list = None,
                         validate: bool = True,
                         eval_set: str = None,
                         eval_fn: str = 'rmse',
                         epochs: int = 500) -> dict:
    '''Tunes neural network learning/architecture hyperparameters

    Args:
        df (ecnet.utils.data_utils.DataFrame): currently loaded data
        vars (dict): ecnet.Server._vars variables
        num_employers (int): number of employer bees
        num_iterations (int): number of search cycles for the colony
        num_processes (int): number of parallel processes to utilize
        shuffle (str): shuffles `train` or `all` sets if not None
        split (list): if shuffle is True, [learn%, valid%, test%]
        validate (bool): if True, uses periodic validation; otherwise, no
        eval_set (str): set used to evaluate bee performance; `learn`, `valid`,
            `train`, `test`, None (all sets)
        eval_fn (str): error function used to evaluate bee performance; `rmse`,
            `mean_abs_error`, `med_abs_error`
        epochs (int): number of training epochs per bee ANN (def: 500)

    Returns:
        dict: tuned hyperparameters
    '''

    if name != 'nt':
        set_start_method('spawn', force=True)

    logger.log('info',
               'Tuning architecture/learning hyperparameters',
               call_loc='TUNE')
    logger.log('debug',
               'Arguments:\n\t| num_employers:\t{}\n\t| '
               'num_iterations:\t{}\n\t| shuffle:\t\t{}\n\t| split:'
               '\t\t{}\n\t| validate:\t\t{}\n\t| eval_set:\t\t{}\n\t'
               '| eval_fn:\t\t{}'.format(num_employers, num_iterations,
                                         shuffle, split, validate, eval_set,
                                         eval_fn),
               call_loc='TUNE')

    fit_fn_args = {
        'df': df,
        'shuffle': shuffle,
        'num_processes': num_processes,
        'split': split,
        'validate': validate,
        'eval_set': eval_set,
        'eval_fn': eval_fn,
        'hidden_layers': vars['hidden_layers'],
        'epochs': epochs
    }

    to_tune = [(1e-9, 1e-4, 'decay'), (1e-5, 0.1, 'learning_rate'),
               (1, len(df.learn_set), 'batch_size'), (64, 1024, 'patience')]
    for hl in range(len(vars['hidden_layers'])):
        to_tune.append((1, 2 * len(df._input_names), 'hl{}'.format(hl)))

    abc = ABC(num_employers, tune_fitness_function, fit_fn_args, num_processes)
    for param in to_tune:
        abc.add_param(param[0], param[1], name=param[2])
    abc.initialize()

    best_ret_val = abc.best_ret_val
    best_params = abc.best_params
    for i in range(num_iterations):
        logger.log('info', 'Iteration {}'.format(i + 1), call_loc='TUNE')
        abc.search()
        new_best_ret = abc.best_ret_val
        new_best_params = abc.best_params
        logger.log('info',
                   'Best Performer: {}, {}'.format(new_best_ret, new_best_ret),
                   call_loc='TUNE')
        if new_best_ret < best_ret_val:
            best_ret_val = new_best_ret
            best_params = new_best_params

    vars['decay'] = best_params['decay']
    vars['learning_rate'] = best_params['learning_rate']
    vars['batch_size'] = best_params['batch_size']
    vars['patience'] = best_params['patience']
    for l_idx in range(len(vars['hidden_layers'])):
        vars['hidden_layers'][l_idx][0] = best_params['hl{}'.format(l_idx)]
    return vars