Beispiel #1
0
    def get_config_space(num_layers=((1, 15), False),
                         num_units=((10, 1024), True),
                         activation=('sigmoid', 'tanh', 'relu'),
                         dropout=(0.0, 0.8),
                         use_dropout=(True, False),
                         **kwargs):
        cs = CS.ConfigurationSpace()

        num_layers_hp = get_hyperparameter(CSH.UniformIntegerHyperparameter,
                                           'num_layers', num_layers)
        cs.add_hyperparameter(num_layers_hp)
        use_dropout_hp = add_hyperparameter(cs, CS.CategoricalHyperparameter,
                                            "use_dropout", use_dropout)

        for i in range(1, num_layers[0][1] + 1):
            n_units_hp = get_hyperparameter(
                CSH.UniformIntegerHyperparameter, "num_units_%d" % i,
                kwargs.pop("num_units_%d" % i, num_units))
            cs.add_hyperparameter(n_units_hp)

            if i > num_layers[0][0]:
                cs.add_condition(
                    CS.GreaterThanCondition(n_units_hp, num_layers_hp, i - 1))

            if True in use_dropout:
                dropout_hp = get_hyperparameter(
                    CSH.UniformFloatHyperparameter, "dropout_%d" % i,
                    kwargs.pop("dropout_%d" % i, dropout))
                cs.add_hyperparameter(dropout_hp)
                dropout_condition_1 = CS.EqualsCondition(
                    dropout_hp, use_dropout_hp, True)

                if i > num_layers[0][0]:
                    dropout_condition_2 = CS.GreaterThanCondition(
                        dropout_hp, num_layers_hp, i - 1)
                    cs.add_condition(
                        CS.AndConjunction(dropout_condition_1,
                                          dropout_condition_2))
                else:
                    cs.add_condition(dropout_condition_1)

        add_hyperparameter(cs, CSH.CategoricalHyperparameter, 'activation',
                           activation)
        assert len(
            kwargs
        ) == 0, "Invalid hyperparameter updates for mlpnet: %s" % str(kwargs)
        return (cs)
Beispiel #2
0
    def get_config_space(user_updates=None):
        cs = CS.ConfigurationSpace()
        range_num_layers = (1, 15)
        range_num_units = (10, 1024)
        possible_activations = ('sigmoid', 'tanh', 'relu')
        range_dropout = (0.0, 0.8)

        if user_updates is not None and 'num_layers' in user_updates:
            range_num_layers = user_updates['num_layers']

        num_layers = CSH.UniformIntegerHyperparameter(
            'num_layers', lower=range_num_layers[0], upper=range_num_layers[1])
        cs.add_hyperparameter(num_layers)
        use_dropout = cs.add_hyperparameter(
            CS.CategoricalHyperparameter("use_dropout", [True, False],
                                         default_value=True))

        for i in range(1, range_num_layers[1] + 1):
            n_units = CSH.UniformIntegerHyperparameter(
                "num_units_%d" % i,
                lower=range_num_units[0],
                upper=range_num_units[1],
                log=True)
            cs.add_hyperparameter(n_units)
            dropout = CSH.UniformFloatHyperparameter("dropout_%d" % i,
                                                     lower=range_dropout[0],
                                                     upper=range_dropout[1])
            cs.add_hyperparameter(dropout)
            dropout_condition_1 = CS.EqualsCondition(dropout, use_dropout,
                                                     True)

            if i > range_num_layers[0]:
                cs.add_condition(
                    CS.GreaterThanCondition(n_units, num_layers, i - 1))

                dropout_condition_2 = CS.GreaterThanCondition(
                    dropout, num_layers, i - 1)
                cs.add_condition(
                    CS.AndConjunction(dropout_condition_1,
                                      dropout_condition_2))
            else:
                cs.add_condition(dropout_condition_1)

        cs.add_hyperparameter(
            CSH.CategoricalHyperparameter('activation', possible_activations))
        return (cs)
Beispiel #3
0
    def get_configspace():
        import ConfigSpace as CS
        import ConfigSpace.hyperparameters as CSH
        """
		It builds the configuration space with the needed hyperparameters.
		It is easily possible to implement different types of hyperparameters.
		Beside float-hyperparameters on a log scale, it is also able to handle categorical input parameter.
		:return: ConfigurationsSpace-Object
		"""
        cs = CS.ConfigurationSpace()

        scaler = CSH.CategoricalHyperparameter('scaler', [
            'None', 'StandardScaler', 'RobustScaler', 'MinMaxScaler',
            'MaxAbsScaler'
        ])
        init = CSH.CategoricalHyperparameter('init', [
            'uniform', 'normal', 'glorot_uniform', 'glorot_normal',
            'he_uniform', 'he_normal'
        ])
        batch_size = CSH.CategoricalHyperparameter('batch_size',
                                                   [16, 32, 64, 128, 256])
        shuffle = CSH.CategoricalHyperparameter('shuffle', [True, False])
        loss = CSH.CategoricalHyperparameter(
            'loss', ['mean_absolute_error', 'mean_squared_error'])
        optimizer = CSH.CategoricalHyperparameter(
            'optimizer', ['rmsprop', 'adagrad', 'adadelta', 'adam', 'adamax'])

        cs.add_hyperparameters(
            [scaler, init, batch_size, shuffle, loss, optimizer])

        n_layers = CSH.UniformIntegerHyperparameter('n_layers',
                                                    lower=1,
                                                    upper=5,
                                                    default_value=2)

        layer_sizes = [
            CSH.UniformIntegerHyperparameter('layer_{}_size'.format(l),
                                             lower=2,
                                             upper=100,
                                             default_value=16,
                                             log=True) for l in range(1, 6)
        ]

        layer_activations = [
            CSH.CategoricalHyperparameter('layer_{}_activation'.format(l),
                                          ['relu', 'sigmoid', 'tanh'])
            for l in range(1, 6)
        ]

        layer_extras = [
            CSH.CategoricalHyperparameter('layer_{}_extras'.format(l),
                                          ['None', 'dropout', 'batchnorm'])
            for l in range(1, 6)
        ]

        dropout_rates = [
            CSH.UniformFloatHyperparameter('dropout_rate_{}'.format(l),
                                           lower=0.1,
                                           upper=0.5,
                                           default_value=0.2,
                                           log=False) for l in range(1, 6)
        ]

        cs.add_hyperparameters([n_layers] + layer_sizes + layer_activations +
                               layer_extras + dropout_rates)

        conditions = [
            CS.GreaterThanCondition(layer_sizes[n], n_layers, n)
            for n in range(1, 5)
        ]

        conditions = conditions + \
            [CS.GreaterThanCondition(layer_activations[n], n_layers, n)
             for n in range(1, 5)]

        conditions = conditions + \
            [CS.GreaterThanCondition(layer_extras[n], n_layers, n)
             for n in range(1, 5)]

        equal_conditions = [
            CS.EqualsCondition(dropout_rates[n], layer_extras[n], 'dropout')
            for n in range(0, 5)
        ]

        greater_size_conditions = [
            CS.GreaterThanCondition(dropout_rates[n], n_layers, n)
            for n in range(1, 5)
        ]

        for c in conditions:
            cs.add_condition(c)

        cs.add_condition(equal_conditions[0])

        for j in range(0, 4):
            cond = CS.AndConjunction(greater_size_conditions[j],
                                     equal_conditions[j + 1])
            cs.add_condition(cond)

        return cs
Beispiel #4
0
    def get_config_space(num_groups=((1, 9), False),
                         blocks_per_group=((1, 4), False),
                         num_units=((10, 1024), True),
                         activation=('sigmoid', 'tanh', 'relu'),
                         max_shake_drop_probability=(0, 1),
                         dropout=(0, 1.0),
                         use_shake_drop=(True, False),
                         use_shake_shake=(True, False),
                         use_dropout=(True, False),
                         **kwargs):
        cs = ConfigSpace.ConfigurationSpace()

        num_groups_hp = get_hyperparameter(
            ConfigSpace.UniformIntegerHyperparameter, "num_groups", num_groups)
        cs.add_hyperparameter(num_groups_hp)
        blocks_per_group_hp = get_hyperparameter(
            ConfigSpace.UniformIntegerHyperparameter, "blocks_per_group",
            blocks_per_group)
        cs.add_hyperparameter(blocks_per_group_hp)
        add_hyperparameter(cs, ConfigSpace.CategoricalHyperparameter,
                           "activation", activation)

        use_dropout_hp = get_hyperparameter(
            ConfigSpace.CategoricalHyperparameter, "use_dropout", use_dropout)
        cs.add_hyperparameter(use_dropout_hp)
        add_hyperparameter(cs, ConfigSpace.CategoricalHyperparameter,
                           "use_shake_shake", use_shake_shake)

        use_shake_drop_hp = add_hyperparameter(
            cs, ConfigSpace.CategoricalHyperparameter, "use_shake_drop",
            use_shake_drop)
        if True in use_shake_drop:
            shake_drop_prob_hp = add_hyperparameter(
                cs, ConfigSpace.UniformFloatHyperparameter,
                "max_shake_drop_probability", max_shake_drop_probability)
            cs.add_condition(
                ConfigSpace.EqualsCondition(shake_drop_prob_hp,
                                            use_shake_drop_hp, True))

        # it is the upper bound of the nr of groups, since the configuration will actually be sampled.
        for i in range(0, num_groups[0][1] + 1):

            n_units_hp = add_hyperparameter(
                cs, ConfigSpace.UniformIntegerHyperparameter,
                "num_units_%d" % i, kwargs.pop("num_units_%d" % i, num_units))

            if i > 1:
                cs.add_condition(
                    ConfigSpace.GreaterThanCondition(n_units_hp, num_groups_hp,
                                                     i - 1))

            if True in use_dropout:
                dropout_hp = add_hyperparameter(
                    cs, ConfigSpace.UniformFloatHyperparameter,
                    "dropout_%d" % i, kwargs.pop("dropout_%d" % i, dropout))
                dropout_condition_1 = ConfigSpace.EqualsCondition(
                    dropout_hp, use_dropout_hp, True)

                if i > 1:

                    dropout_condition_2 = ConfigSpace.GreaterThanCondition(
                        dropout_hp, num_groups_hp, i - 1)

                    cs.add_condition(
                        ConfigSpace.AndConjunction(dropout_condition_1,
                                                   dropout_condition_2))
                else:
                    cs.add_condition(dropout_condition_1)
        assert len(
            kwargs
        ) == 0, "Invalid hyperparameter updates for resnet: %s" % str(kwargs)
        return cs
    def get_config_space(   growth_rate_range=(5, 128), nr_blocks=(1, 5), kernel_range=(2, 7), 
                            layer_range=(5, 50), activations=all_activations.keys(),
                            conv_init=('random', 'kaiming_normal', 'constant_0', 'constant_1', 'constant_05'),
                            batchnorm_weight_init=('random', 'constant_0', 'constant_1', 'constant_05'),
                            batchnorm_bias_init=('random', 'constant_0', 'constant_1', 'constant_05'),
                            linear_bias_init=('random', 'constant_0', 'constant_1', 'constant_05'), **kwargs):

        import ConfigSpace as CS
        import ConfigSpace.hyperparameters as CSH
        from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter

        cs = CS.ConfigurationSpace()
        growth_rate_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'growth_rate', growth_rate_range)
        first_conv_kernel_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'first_conv_kernel', kernel_range)
        first_pool_kernel_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'first_pool_kernel', kernel_range)
        conv_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'conv_init', conv_init)
        batchnorm_weight_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'batchnorm_weight_init', batchnorm_weight_init)
        batchnorm_bias_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'batchnorm_bias_init', batchnorm_bias_init)
        linear_bias_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'linear_bias_init', linear_bias_init)
        first_activation_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'first_activation', sorted(set(activations).intersection(all_activations)))
        blocks_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'blocks', nr_blocks)

        cs.add_hyperparameter(growth_rate_hp)
        cs.add_hyperparameter(first_conv_kernel_hp)
        cs.add_hyperparameter(first_pool_kernel_hp)
        cs.add_hyperparameter(conv_init_hp)
        cs.add_hyperparameter(batchnorm_weight_init_hp)
        cs.add_hyperparameter(batchnorm_bias_init_hp)
        cs.add_hyperparameter(linear_bias_init_hp)
        cs.add_hyperparameter(first_activation_hp)
        cs.add_hyperparameter(blocks_hp)
        add_hyperparameter(cs,   CSH.UniformFloatHyperparameter, 'channel_reduction', [0.1, 0.9])
        add_hyperparameter(cs,   CSH.UniformFloatHyperparameter, 'last_image_size', [0, 1])
        add_hyperparameter(cs,    CSH.CategoricalHyperparameter, 'bottleneck', [True, False])
        use_dropout =   add_hyperparameter(cs,    CSH.CategoricalHyperparameter, 'use_dropout', [True, False])

        if type(nr_blocks[0]) == int:
            min_blocks = nr_blocks[0]
            max_blocks = nr_blocks[1]
        else:
            min_blocks = nr_blocks[0][0]
            max_blocks = nr_blocks[0][1]

        for i in range(1, max_blocks+1):
            layer_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'layer_in_block_%d' % i, layer_range)
            pool_kernel_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'pool_kernel_%d' % i, kernel_range)
            activation_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'activation_%d' % i, sorted(set(activations).intersection(all_activations)))
            cs.add_hyperparameter(layer_hp)
            cs.add_hyperparameter(pool_kernel_hp)
            cs.add_hyperparameter(activation_hp)
            dropout =       add_hyperparameter(cs,   CSH.UniformFloatHyperparameter, 'dropout_%d' % i, [0.0, 1.0])
            conv_kernel =   add_hyperparameter(cs,    CSH.CategoricalHyperparameter, 'conv_kernel_%d' % i, [3, 5, 7])

            
            if i > min_blocks:
                cs.add_condition(CS.GreaterThanCondition(layer_hp, blocks_hp, i-1))
                cs.add_condition(CS.GreaterThanCondition(conv_kernel, blocks_hp, i-1))
                cs.add_condition(CS.GreaterThanCondition(pool_kernel_hp, blocks_hp, i-1))
                cs.add_condition(CS.GreaterThanCondition(activation_hp, blocks_hp, i-1))
                cs.add_condition(CS.AndConjunction(CS.EqualsCondition(dropout, use_dropout, True), CS.GreaterThanCondition(dropout, blocks_hp, i-1)))
            else:
                cs.add_condition(CS.EqualsCondition(dropout, use_dropout, True))

        return cs
Beispiel #6
0
def get_hyperparameter_search_space(seed):
    """
    Neural Network search space based on a best effort using the scikit-learn
    implementation. Note that for state of the art performance, other packages
    could be preferred.

    Parameters
    ----------
    seed: int
        Random seed that will be used to sample random configurations

    Returns
    -------
    cs: ConfigSpace.ConfigurationSpace
        The configuration space object
    """
    cs = ConfigSpace.ConfigurationSpace('sklearn.neural_network.MLPClassifier', seed)
    strategy = ConfigSpace.CategoricalHyperparameter(
        name='columntransformer__numeric__imputer__strategy', choices=['mean', 'median', 'most_frequent'])
    hidden_layer_sizes = ConfigSpace.UniformIntegerHyperparameter(
        name='mlpclassifier__hidden_layer_sizes', lower=32, upper=2048, default_value=2048)
    activation = ConfigSpace.CategoricalHyperparameter(
        name='mlpclassifier__activation', choices=['identity', 'logistic', 'tanh', 'relu'], default_value='relu')
    solver = ConfigSpace.CategoricalHyperparameter(
        name='mlpclassifier__solver', choices=['lbfgs', 'sgd', 'adam'], default_value='adam')
    alpha = ConfigSpace.UniformFloatHyperparameter(
        name='mlpclassifier__alpha', lower=1e-5, upper=1e-1, log=True, default_value=1e-4)
    batch_size = ConfigSpace.UniformIntegerHyperparameter(
        name='mlpclassifier__batch_size', lower=32, upper=4096, default_value=200)
    learning_rate = ConfigSpace.CategoricalHyperparameter(
        name='mlpclassifier__learning_rate', choices=['constant', 'invscaling', 'adaptive'], default_value='constant')
    learning_rate_init = ConfigSpace.UniformFloatHyperparameter(
        name='mlpclassifier__learning_rate_init', lower=1e-5, upper=1e-1, log=True, default_value=1e-04)
    # TODO: Sensible range??
    power_t = ConfigSpace.UniformFloatHyperparameter(
        name='mlpclassifier__power_t', lower=1e-5, upper=1, log=True, default_value=0.5)
    max_iter = ConfigSpace.UniformIntegerHyperparameter(
        name='mlpclassifier__max_iter', lower=64, upper=1024, default_value=200)
    shuffle = ConfigSpace.CategoricalHyperparameter(
        name='mlpclassifier__shuffle', choices=[True, False], default_value=True)
    tol = ConfigSpace.UniformFloatHyperparameter(
        name='mlpclassifier__tol', lower=1e-5, upper=1e-1, default_value=1e-4, log=True)
    # TODO: log-scale?
    momentum = ConfigSpace.UniformFloatHyperparameter(
        name='mlpclassifier__momentum', lower=0, upper=1, default_value=0.9)
    nesterovs_momentum = ConfigSpace.CategoricalHyperparameter(
        name='mlpclassifier__nesterovs_momentum', choices=[True, False], default_value=True)
    early_stopping = ConfigSpace.CategoricalHyperparameter(
        name='mlpclassifier__early_stopping', choices=[True, False], default_value=True)
    validation_fraction = ConfigSpace.UniformFloatHyperparameter(
        name='mlpclassifier__validation_fraction', lower=0, upper=1, default_value=0.1)
    beta_1 = ConfigSpace.UniformFloatHyperparameter(
        name='mlpclassifier__beta_1', lower=0, upper=1, default_value=0.9)
    beta_2 = ConfigSpace.UniformFloatHyperparameter(
        name='mlpclassifier__beta_2', lower=0, upper=1, default_value=0.999)
    n_iter_no_change = ConfigSpace.UniformIntegerHyperparameter(
        name='mlpclassifier__n_iter_no_change', lower=1, upper=1024, default_value=200)

    cs.add_hyperparameters([
        strategy,
        hidden_layer_sizes,
        activation,
        solver,
        alpha,
        batch_size,
        learning_rate,
        learning_rate_init,
        power_t,
        max_iter,
        shuffle,
        tol,
        momentum,
        nesterovs_momentum,
        early_stopping,
        validation_fraction,
        beta_1,
        beta_2,
        n_iter_no_change,
    ])

    batch_size_condition = ConfigSpace.InCondition(batch_size, solver, ['sgd', 'adam'])
    learning_rate_init_condition = ConfigSpace.InCondition(learning_rate_init, solver, ['sgd', 'adam'])
    power_t_condition = ConfigSpace.EqualsCondition(power_t, solver, 'sgd')
    shuffle_confition = ConfigSpace.InCondition(shuffle, solver, ['sgd', 'adam'])
    tol_condition = ConfigSpace.InCondition(tol, learning_rate, ['constant', 'invscaling'])
    momentum_confition = ConfigSpace.EqualsCondition(momentum, solver, 'sgd')
    nesterovs_momentum_confition_solver = ConfigSpace.EqualsCondition(nesterovs_momentum, solver, 'sgd')
    nesterovs_momentum_confition_momentum = ConfigSpace.GreaterThanCondition(nesterovs_momentum, momentum, 0)
    nesterovs_momentum_conjunstion = ConfigSpace.AndConjunction(nesterovs_momentum_confition_solver,
                                                                nesterovs_momentum_confition_momentum)
    early_stopping_condition = ConfigSpace.InCondition(early_stopping, solver, ['sgd', 'adam'])
    validation_fraction_condition = ConfigSpace.EqualsCondition(validation_fraction, early_stopping, True)
    beta_1_condition = ConfigSpace.EqualsCondition(beta_1, solver, 'adam')
    beta_2_condition = ConfigSpace.EqualsCondition(beta_2, solver, 'adam')
    n_iter_no_change_condition_solver = ConfigSpace.InCondition(n_iter_no_change, solver, ['sgd', 'adam'])

    cs.add_condition(batch_size_condition)
    cs.add_condition(learning_rate_init_condition)
    cs.add_condition(power_t_condition)
    cs.add_condition(shuffle_confition)
    cs.add_condition(tol_condition)
    cs.add_condition(momentum_confition)
    cs.add_condition(nesterovs_momentum_conjunstion)
    cs.add_condition(early_stopping_condition)
    cs.add_condition(validation_fraction_condition)
    cs.add_condition(beta_1_condition)
    cs.add_condition(beta_2_condition)
    cs.add_condition(n_iter_no_change_condition_solver)

    return cs
Beispiel #7
0
# 3) LessThanCondition:
#    'd' is only active if 'b' is less than 5
# We do not add this condition here directly, because we will use it later in the 'and-conjunction'.
less_cond = CS.LessThanCondition(d, b, 5)

# 4) GreaterThanCondition:
#    'd' is only active if 'b' is greater than 2
greater_cond = CS.GreaterThanCondition(d, b, 2)

# 5) InCondition:
#    'e' is only active if 'c' is in the set [25, 26, 27]
in_cond = CS.InCondition(e, c, [25, 26, 27])

# 6) AndConjunction:
#    The 'and-conjunction' combines the conditions less_cond and greater_cond
cs.add_condition(CS.AndConjunction(less_cond, greater_cond))

# 7) OrConjunction:
#    The 'or-conjunction' works similar to the 'and-conjunction'
equals_cond = CS.EqualsCondition(e, a, 2)
cs.add_condition(CS.OrConjunction(in_cond, equals_cond))

# 8) ForbiddenEqualsClause:
#    This clause forbids the value 2 for the hyperparameter f
forbidden_clause_f = CS.ForbiddenEqualsClause(f, 2)

# 9) ForbiddenInClause
#    This clause forbids the value of the hyperparameter g to be in the set [2]
forbidden_clause_g = CS.ForbiddenInClause(g, [2])

# 10) ForbiddenAndConjunction
def configuration_space_from_raw(hpRaw,
                                 hpRawConditions,
                                 resolve_multiple='AND'):
    cs = CS.ConfigurationSpace()
    #
    # add hyperparameters
    #
    for hp in hpRaw:
        if hp[4] == "float":
            cs.add_hyperparameter(
                CS.UniformFloatHyperparameter(hp[0],
                                              lower=hp[1][0],
                                              upper=hp[1][1],
                                              default_value=hp[2],
                                              log=hp[3]))
        elif hp[4] == "int":
            cs.add_hyperparameter(
                CS.UniformIntegerHyperparameter(hp[0],
                                                lower=hp[1][0],
                                                upper=hp[1][1],
                                                default_value=hp[2],
                                                log=hp[3]))
        elif (hp[4] == "cat"):
            cs.add_hyperparameter(CS.CategoricalHyperparameter(hp[0], hp[1]))
        else:
            raise Exception("unknown hp type in hpRawList")

    #
    # add conditions
    #
    covered_conditions = dict()
    for cond in hpRawConditions:
        # check if conditions for that hyperparameter were already processed
        if cond[0] in covered_conditions:
            continue
        covered_conditions[cond[0]] = True

        # get all conditions for that hyperparameter
        all_conds_for_hyperparameter = []
        for other_cond in hpRawConditions:
            if other_cond[0] == cond[0]:
                all_conds_for_hyperparameter.append(other_cond)

        # create the condition objects
        condition_objects = []
        for cond in all_conds_for_hyperparameter:
            if cond[1] == "eq":
                condition_objects.append(
                    CS.EqualsCondition(cs.get_hyperparameter(cond[0]),
                                       cs.get_hyperparameter(cond[2]),
                                       cond[3]))
            elif cond[1] == "gtr":
                condition_objects.append(
                    CS.GreaterThanCondition(cs.get_hyperparameter(cond[0]),
                                            cs.get_hyperparameter(cond[2]),
                                            cond[3]))
            else:
                raise Exception("unknown condition type in hpRawConditions")

        # add the conditons to the configuration space
        if len(condition_objects) == 1:
            # simply add the condition
            cs.add_condition(condition_objects[0])
        else:
            # resolve multiple conditions
            if resolve_multiple == 'AND':
                cs.add_condition(CS.AndConjunction(*condition_objects))
            elif resolve_multiple == 'OR':
                cs.add_condition(CS.OrConjunction(*condition_objects))
            else:
                raise Exception("resolve_multiple=", resolve_multiple,
                                ". should be 'AND' or 'OR'")

    return cs
Beispiel #9
0
    def get_hyperparameter_search_space(
        dataset_properties: Optional[Dict[str,
                                          BaseDatasetPropertiesType]] = None,
        num_groups: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="num_groups",
            value_range=(1, 15),
            default_value=5,
        ),
        activation: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="activation",
            value_range=tuple(_activations.keys()),
            default_value=list(_activations.keys())[0],
        ),
        use_dropout: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="use_dropout",
            value_range=(True, False),
            default_value=False,
        ),
        num_units: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="num_units",
            value_range=(10, 1024),
            default_value=200,
        ),
        dropout: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="dropout",
            value_range=(0, 0.8),
            default_value=0.5,
        ),
    ) -> ConfigurationSpace:

        cs = ConfigurationSpace()

        # The number of hidden layers the network will have.
        # Layer blocks are meant to have the same architecture, differing only
        # by the number of units
        min_mlp_layers, max_mlp_layers = num_groups.value_range
        num_groups = get_hyperparameter(num_groups,
                                        UniformIntegerHyperparameter)
        add_hyperparameter(cs, activation, CategoricalHyperparameter)

        # We can have dropout in the network for
        # better generalization
        use_dropout = get_hyperparameter(use_dropout,
                                         CategoricalHyperparameter)
        cs.add_hyperparameters([num_groups, use_dropout])

        for i in range(1, int(max_mlp_layers) + 1):
            n_units_search_space = HyperparameterSearchSpace(
                hyperparameter='num_units_%d' % i,
                value_range=num_units.value_range,
                default_value=num_units.default_value,
                log=num_units.log)
            n_units_hp = get_hyperparameter(n_units_search_space,
                                            UniformIntegerHyperparameter)
            cs.add_hyperparameter(n_units_hp)

            if i > int(min_mlp_layers):
                # The units of layer i should only exist
                # if there are at least i layers
                cs.add_condition(
                    CS.GreaterThanCondition(n_units_hp, num_groups, i - 1))
            dropout_search_space = HyperparameterSearchSpace(
                hyperparameter='dropout_%d' % i,
                value_range=dropout.value_range,
                default_value=dropout.default_value,
                log=dropout.log)
            dropout_hp = get_hyperparameter(dropout_search_space,
                                            UniformFloatHyperparameter)
            cs.add_hyperparameter(dropout_hp)

            dropout_condition_1 = CS.EqualsCondition(dropout_hp, use_dropout,
                                                     True)

            if i > int(min_mlp_layers):
                dropout_condition_2 = CS.GreaterThanCondition(
                    dropout_hp, num_groups, i - 1)
                cs.add_condition(
                    CS.AndConjunction(dropout_condition_1,
                                      dropout_condition_2))
            else:
                cs.add_condition(dropout_condition_1)

        return cs
Beispiel #10
0
    def fit_search_space(self, df):
        """Apply prior-guided transfer learning based on a DataFrame of results.

        :meta private:

        Args:
            df (str|DataFrame): a checkpoint from a previous search.
        """

        if type(df) is str and df[-4:] == ".csv":
            df = pd.read_csv(df)
        assert isinstance(df, pd.DataFrame)

        cst = self._problem.space
        if type(cst) != CS.ConfigurationSpace:
            logging.error(f"{type(cst)}: not supported for trainsfer learning")

        res_df = df
        res_df_names = res_df.columns.values
        best_index = np.argmax(res_df["objective"].values)
        best_param = res_df.iloc[best_index]

        fac_numeric = 8.0
        fac_categorical = 10.0

        cst_new = CS.ConfigurationSpace(seed=1234)
        hp_names = cst.get_hyperparameter_names()
        for hp_name in hp_names:
            hp = cst.get_hyperparameter(hp_name)
            if hp_name in res_df_names:
                if (type(hp) is csh.UniformIntegerHyperparameter
                        or type(hp) is csh.UniformFloatHyperparameter):
                    mu = best_param[hp.name]
                    lower = hp.lower
                    upper = hp.upper
                    sigma = max(1.0, (upper - lower) / fac_numeric)
                    if type(hp) is csh.UniformIntegerHyperparameter:
                        param_new = csh.NormalIntegerHyperparameter(
                            name=hp.name,
                            default_value=mu,
                            mu=mu,
                            sigma=sigma,
                            lower=lower,
                            upper=upper,
                        )
                    else:  # type is csh.UniformFloatHyperparameter:
                        param_new = csh.NormalFloatHyperparameter(
                            name=hp.name,
                            default_value=mu,
                            mu=mu,
                            sigma=sigma,
                            lower=lower,
                            upper=upper,
                        )
                    cst_new.add_hyperparameter(param_new)
                elif type(hp) is csh.CategoricalHyperparameter:
                    choices = hp.choices
                    weights = len(hp.choices) * [1.0]
                    index = choices.index(best_param[hp.name])
                    weights[index] = fac_categorical
                    norm_weights = [float(i) / sum(weights) for i in weights]
                    param_new = csh.CategoricalHyperparameter(
                        name=hp.name, choices=choices, weights=norm_weights)
                    cst_new.add_hyperparameter(param_new)
                else:
                    logging.warning(
                        "Not fitting {hp} because it is not supported!")
                    cst_new.add_hyperparameter(hp)
            else:
                logging.warning(
                    "Not fitting {hp} because it was not found in the dataframe!"
                )
                cst_new.add_hyperparameter(hp)

        # For conditions
        for cond in cst.get_conditions():
            if type(cond) == CS.AndConjunction or type(
                    cond) == CS.OrConjunction:
                cond_list = []
                for comp in cond.components:
                    cond_list.append(self.return_cond(comp, cst_new))
                if type(cond) is CS.AndConjunction:
                    cond_new = CS.AndConjunction(*cond_list)
                elif type(cond) is CS.OrConjunction:
                    cond_new = CS.OrConjunction(*cond_list)
                else:
                    logging.warning(
                        f"Condition {type(cond)} is not implemented!")
            else:
                cond_new = self.return_cond(cond, cst_new)
            cst_new.add_condition(cond_new)

        # For forbiddens
        for cond in cst.get_forbiddens():
            if type(cond) is CS.ForbiddenAndConjunction:
                cond_list = []
                for comp in cond.components:
                    cond_list.append(self.return_forbid(comp, cst_new))
                cond_new = CS.ForbiddenAndConjunction(*cond_list)
            elif (type(cond) is CS.ForbiddenEqualsClause
                  or type(cond) is CS.ForbiddenInClause):
                cond_new = self.return_forbid(cond, cst_new)
            else:
                logging.warning(f"Forbidden {type(cond)} is not implemented!")
            cst_new.add_forbidden_clause(cond_new)

        self._opt_kwargs["dimensions"] = cst_new
Beispiel #11
0
    def get_hyperparameter_search_space(
        dataset_properties: Optional[Dict[str,
                                          BaseDatasetPropertiesType]] = None,
        num_groups: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="num_groups",
            value_range=(1, 15),
            default_value=5,
        ),
        use_dropout: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="use_dropout",
            value_range=(True, False),
            default_value=False,
        ),
        num_units: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="num_units",
            value_range=(10, 1024),
            default_value=200,
        ),
        activation: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="activation",
            value_range=tuple(_activations.keys()),
            default_value=list(_activations.keys())[0],
        ),
        blocks_per_group: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="blocks_per_group",
            value_range=(1, 4),
            default_value=2,
        ),
        dropout: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="dropout",
            value_range=(0, 0.8),
            default_value=0.5,
        ),
        use_shake_shake: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="use_shake_shake",
            value_range=(True, False),
            default_value=True,
        ),
        use_shake_drop: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="use_shake_drop",
            value_range=(True, False),
            default_value=True,
        ),
        max_shake_drop_probability:
        HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="max_shake_drop_probability",
            value_range=(0, 1),
            default_value=0.5),
    ) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        # The number of groups that will compose the resnet. That is,
        # a group can have N Resblock. The M number of this N resblock
        # repetitions is num_groups
        min_num_gropus, max_num_groups = num_groups.value_range
        num_groups = get_hyperparameter(num_groups,
                                        UniformIntegerHyperparameter)

        add_hyperparameter(cs, activation, CategoricalHyperparameter)
        cs.add_hyperparameters([num_groups])

        # We can have dropout in the network for
        # better generalization
        use_dropout = get_hyperparameter(use_dropout,
                                         CategoricalHyperparameter)
        cs.add_hyperparameters([use_dropout])

        use_shake_shake = get_hyperparameter(use_shake_shake,
                                             CategoricalHyperparameter)
        use_shake_drop = get_hyperparameter(use_shake_drop,
                                            CategoricalHyperparameter)
        shake_drop_prob = get_hyperparameter(max_shake_drop_probability,
                                             UniformFloatHyperparameter)
        cs.add_hyperparameters(
            [use_shake_shake, use_shake_drop, shake_drop_prob])
        cs.add_condition(
            CS.EqualsCondition(shake_drop_prob, use_shake_drop, True))

        # It is the upper bound of the nr of groups,
        # since the configuration will actually be sampled.
        for i in range(0, int(max_num_groups) + 1):

            n_units_search_space = HyperparameterSearchSpace(
                hyperparameter='num_units_%d' % i,
                value_range=num_units.value_range,
                default_value=num_units.default_value,
                log=num_units.log)
            n_units_hp = get_hyperparameter(n_units_search_space,
                                            UniformIntegerHyperparameter)

            blocks_per_group_search_space = HyperparameterSearchSpace(
                hyperparameter='blocks_per_group_%d' % i,
                value_range=blocks_per_group.value_range,
                default_value=blocks_per_group.default_value,
                log=blocks_per_group.log)
            blocks_per_group_hp = get_hyperparameter(
                blocks_per_group_search_space, UniformIntegerHyperparameter)
            cs.add_hyperparameters([n_units_hp, blocks_per_group_hp])

            if i > 1:
                cs.add_condition(
                    CS.GreaterThanCondition(n_units_hp, num_groups, i - 1))
                cs.add_condition(
                    CS.GreaterThanCondition(blocks_per_group_hp, num_groups,
                                            i - 1))

            dropout_search_space = HyperparameterSearchSpace(
                hyperparameter='dropout_%d' % i,
                value_range=dropout.value_range,
                default_value=dropout.default_value,
                log=dropout.log)
            dropout_hp = get_hyperparameter(dropout_search_space,
                                            UniformFloatHyperparameter)
            cs.add_hyperparameter(dropout_hp)

            dropout_condition_1 = CS.EqualsCondition(dropout_hp, use_dropout,
                                                     True)

            if i > 1:

                dropout_condition_2 = CS.GreaterThanCondition(
                    dropout_hp, num_groups, i - 1)

                cs.add_condition(
                    CS.AndConjunction(dropout_condition_1,
                                      dropout_condition_2))
            else:
                cs.add_condition(dropout_condition_1)
        return cs
Beispiel #12
0
    def get_config_space(user_updates=None):
        cs = ConfigSpace.ConfigurationSpace()
        range_num_groups = (1, 9)
        range_blocks_per_group = (1, 4)
        range_num_units = (10, 1024)
        possible_activations = ('sigmoid', 'tanh', 'relu')
        range_max_shake_drop_probability = (0, 1)
        range_dropout = (0, 0.8)

        if user_updates is not None and 'num_groups' in user_updates:
            range_num_groups = user_updates['num_groups']
        if user_updates is not None and 'blocks_per_group' in user_updates:
            range_blocks_per_group = user_updates['blocks_per_group']

        num_groups = ConfigSpace.UniformIntegerHyperparameter(
            "num_groups", lower=range_num_groups[0], upper=range_num_groups[1])
        cs.add_hyperparameter(num_groups)
        num_res_blocks = ConfigSpace.UniformIntegerHyperparameter(
            "blocks_per_group",
            lower=range_blocks_per_group[0],
            upper=range_blocks_per_group[1])
        cs.add_hyperparameter(num_res_blocks)
        cs.add_hyperparameter(
            ConfigSpace.CategoricalHyperparameter("activation",
                                                  possible_activations))

        use_dropout = ConfigSpace.CategoricalHyperparameter("use_dropout",
                                                            [True, False],
                                                            default_value=True)
        cs.add_hyperparameter(use_dropout)
        cs.add_hyperparameter(
            ConfigSpace.CategoricalHyperparameter("use_shake_shake",
                                                  [True, False],
                                                  default_value=True))

        shake_drop = cs.add_hyperparameter(
            ConfigSpace.CategoricalHyperparameter("use_shake_drop",
                                                  [True, False],
                                                  default_value=True))
        shake_drop_prob = cs.add_hyperparameter(
            ConfigSpace.UniformFloatHyperparameter(
                "max_shake_drop_probability",
                lower=range_max_shake_drop_probability[0],
                upper=range_max_shake_drop_probability[1]))
        cs.add_condition(
            ConfigSpace.EqualsCondition(shake_drop_prob, shake_drop, True))

        # it is the upper bound of the nr of groups, since the configuration will actually be sampled.
        for i in range(0, range_num_groups[1] + 1):

            n_units = ConfigSpace.UniformIntegerHyperparameter(
                "num_units_%d" % i,
                lower=range_num_units[0],
                upper=range_num_units[1],
                log=True)
            cs.add_hyperparameter(n_units)
            dropout = ConfigSpace.UniformFloatHyperparameter(
                "dropout_%d" % i,
                lower=range_dropout[0],
                upper=range_dropout[1])
            cs.add_hyperparameter(dropout)
            dropout_condition_1 = ConfigSpace.EqualsCondition(
                dropout, use_dropout, True)

            if i > 1:
                cs.add_condition(
                    ConfigSpace.GreaterThanCondition(n_units, num_groups,
                                                     i - 1))

                dropout_condition_2 = ConfigSpace.GreaterThanCondition(
                    dropout, num_groups, i - 1)
                cs.add_condition(
                    ConfigSpace.AndConjunction(dropout_condition_1,
                                               dropout_condition_2))
            else:
                cs.add_condition(dropout_condition_1)

        return cs
Beispiel #13
0
    def get_configspace():
        '''
        Defines the configuration space for the Target Algorithm - the CNN module in this case
        :return: a ConfigSpace object containing the hyperparameters, conditionals and forbidden clauses on them
        '''
        config_space = CS.ConfigurationSpace()
        #########################
        # OPTIMIZER HYPERPARAMS #
        #########################
        alpha = CSH.UniformFloatHyperparameter('learning_rate',
                                               lower=0.00001,
                                               upper=0.1,
                                               default_value=0.001,
                                               log=True)
        opti = CSH.CategoricalHyperparameter('model_optimizer',
                                             choices=['adam', 'adad', 'sgd'],
                                             default_value='sgd')
        amsgrad = CSH.CategoricalHyperparameter('amsgrad',
                                                choices=['True', 'False'],
                                                default_value='False')
        # ^ https://openreview.net/forum?id=ryQu7f-RZ
        sgdmom = CSH.UniformFloatHyperparameter('momentum',
                                                lower=0,
                                                upper=0.99,
                                                default_value=0.90)
        # ^ https://distill.pub/2017/momentum/
        config_space.add_hyperparameters([alpha, opti, amsgrad, sgdmom])
        ###########################
        # OPTIMIZER CONDITIONALS  #
        ###########################
        amsgrad_cond = CS.EqualsCondition(amsgrad, opti, 'adam')
        sgdmom_cond = CS.EqualsCondition(sgdmom, opti, 'sgd')
        config_space.add_conditions([amsgrad_cond, sgdmom_cond])

        ########################
        # TRAINING HYPERPARAMS #
        ########################
        # loss = CSH.CategoricalHyperparameter('training_criterion', choices=['cross_entropy'],
        #                                       default_value='cross_entropy')
        # aug_prob = CSH.UniformFloatHyperparameter('aug_prob', lower=0, upper=0.5, default_value=0)
        batch = CSH.CategoricalHyperparameter(
            'batch_size',
            choices=['50', '100', '200', '500', '1000'],
            default_value='100')
        # ^ https://stats.stackexchange.com/questions/164876/tradeoff-batch-size-vs-number-of-iterations-to-train-a-neural-network
        # ^ https://stats.stackexchange.com/questions/49528/batch-gradient-descent-versus-stochastic-gradient-descent
        config_space.add_hyperparameters([batch])

        ############################
        # ARCHITECTURE HYPERPARAMS #
        ############################
        n_conv_layer = CSH.UniformIntegerHyperparameter('n_conv_layer',
                                                        lower=1,
                                                        upper=3,
                                                        default_value=1,
                                                        log=False)
        n_fc_layer = CSH.UniformIntegerHyperparameter('n_fc_layer',
                                                      lower=1,
                                                      upper=3,
                                                      default_value=1,
                                                      log=False)
        dropout = CSH.CategoricalHyperparameter('dropout',
                                                choices=['True', 'False'],
                                                default_value='False')
        activation = CSH.CategoricalHyperparameter(
            'activation',
            choices=['relu', 'tanh', 'sigmoid'],
            default_value='tanh')
        batchnorm = CSH.CategoricalHyperparameter('batchnorm',
                                                  choices=['True', 'False'],
                                                  default_value='False')
        config_space.add_hyperparameters(
            [n_conv_layer, n_fc_layer, dropout, activation, batchnorm])
        #
        # LAYER 1 PARAMS
        #
        kernel_1 = CSH.CategoricalHyperparameter('kernel_1',
                                                 choices=['3', '5', '7'],
                                                 default_value='5')
        channel_1 = CSH.UniformIntegerHyperparameter('channel_1',
                                                     lower=3,
                                                     upper=12,
                                                     default_value=3)
        padding_1 = CSH.UniformIntegerHyperparameter('padding_1',
                                                     lower=0,
                                                     upper=3,
                                                     default_value=2)
        stride_1 = CSH.UniformIntegerHyperparameter('stride_1',
                                                    lower=1,
                                                    upper=2,
                                                    default_value=1)
        maxpool_1 = CSH.CategoricalHyperparameter('maxpool_1',
                                                  choices=['True', 'False'],
                                                  default_value='True')
        maxpool_kernel_1 = CSH.UniformIntegerHyperparameter('maxpool_kernel_1',
                                                            lower=2,
                                                            upper=6,
                                                            default_value=6)
        config_space.add_hyperparameters([
            kernel_1, padding_1, stride_1, maxpool_1, maxpool_kernel_1,
            channel_1
        ])
        # LAYER 1 CONDITIONALS
        maxpool_cond_1 = CS.NotEqualsCondition(maxpool_1, stride_1, 2)
        # ^ Convolution with stride 2 is equivalent to Maxpool - https://arxiv.org/abs/1412.6806
        maxpool_kernel_cond_1 = CS.EqualsCondition(maxpool_kernel_1, maxpool_1,
                                                   'True')
        config_space.add_conditions([maxpool_cond_1, maxpool_kernel_cond_1])
        # LAYER 1 - RESTRICTING PADDING RANGE
        # Ensuring a padding domain of {0, 1, ..., floor(n/2)} for kernel_size n
        padding_1_cond_0 = CS.ForbiddenAndConjunction(
            CS.ForbiddenEqualsClause(kernel_1, '3'),
            CS.ForbiddenInClause(padding_1, [2, 3]))
        padding_1_cond_1 = CS.ForbiddenAndConjunction(
            CS.ForbiddenEqualsClause(kernel_1, '5'),
            CS.ForbiddenEqualsClause(padding_1, 3))
        config_space.add_forbidden_clauses(
            [padding_1_cond_0, padding_1_cond_1])

        #
        # LAYER 2 PARAMS
        #
        kernel_2 = CSH.CategoricalHyperparameter('kernel_2',
                                                 choices=['3', '5', '7'],
                                                 default_value='5')
        # Channels for Layer 2 onwards is a multiplicative factor of previous layer's channel size
        channel_2 = CSH.CategoricalHyperparameter('channel_2',
                                                  choices=['1', '2', '3', '4'],
                                                  default_value='2')
        # ^ Categorical instead of Integer owing to the design choice of channel_3 - for parity's sake I suppose
        padding_2 = CSH.UniformIntegerHyperparameter('padding_2',
                                                     lower=0,
                                                     upper=3,
                                                     default_value=2)
        stride_2 = CSH.UniformIntegerHyperparameter('stride_2',
                                                    lower=1,
                                                    upper=2,
                                                    default_value=1)
        maxpool_2 = CSH.CategoricalHyperparameter('maxpool_2',
                                                  choices=['True', 'False'],
                                                  default_value='True')
        maxpool_kernel_2 = CSH.UniformIntegerHyperparameter('maxpool_kernel_2',
                                                            lower=2,
                                                            upper=6,
                                                            default_value=6)
        config_space.add_hyperparameters([
            kernel_2, padding_2, stride_2, maxpool_2, maxpool_kernel_2,
            channel_2
        ])
        # LAYER 2 CONDITIONALS
        maxpool_cond_2 = CS.NotEqualsCondition(maxpool_2, stride_2, 2)
        # ^ Convolution with stride 2 is equivalent to Maxpool - https://arxiv.org/abs/1412.6806
        maxpool_kernel_cond_2 = CS.EqualsCondition(maxpool_kernel_2, maxpool_2,
                                                   'True')
        # LAYER 2 - RESTRICTING PADDING RANGE
        # Ensuring a padding domain of {0, 1, ..., floor(n/2)} for kernel_size n
        padding_2_cond_0 = CS.ForbiddenAndConjunction(
            CS.ForbiddenEqualsClause(kernel_2, '3'),
            CS.ForbiddenInClause(padding_2, [2, 3]))
        padding_2_cond_1 = CS.ForbiddenAndConjunction(
            CS.ForbiddenEqualsClause(kernel_2, '5'),
            CS.ForbiddenEqualsClause(padding_2, 3))
        config_space.add_forbidden_clauses(
            [padding_2_cond_0, padding_2_cond_1])
        # LAYER 2 ACTIVATE CONDITION
        # Layer 2 params will activate optionally only if n_conv_layer >= 2
        kernel_2_cond = CS.InCondition(kernel_2, n_conv_layer, [2, 3])
        channel_2_cond = CS.InCondition(channel_2, n_conv_layer, [2, 3])
        padding_2_cond = CS.InCondition(padding_2, n_conv_layer, [2, 3])
        stride_2_cond = CS.InCondition(stride_2, n_conv_layer, [2, 3])
        maxpool_2_cond = CS.AndConjunction(
            CS.InCondition(maxpool_2, n_conv_layer, [2, 3]), maxpool_cond_2)
        maxpool_kernel_2_cond = CS.AndConjunction(
            CS.InCondition(maxpool_kernel_2, n_conv_layer, [2, 3]),
            maxpool_kernel_cond_2)
        config_space.add_conditions([
            kernel_2_cond, channel_2_cond, padding_2_cond, stride_2_cond,
            maxpool_2_cond, maxpool_kernel_2_cond
        ])

        #
        # LAYER 3 PARAMS
        #
        kernel_3 = CSH.CategoricalHyperparameter('kernel_3',
                                                 choices=['1', '3', '5', '7'],
                                                 default_value='5')
        # Channels for Layer 2 onwards is a multiplicative factor of previous layer's channel size
        # Also being the max convolution layer allowed, this allows for 1x1 convolution
        # Therefore, a downsampling of channel depth (factor of 0.5) - reduce dimensions along depth
        channel_3 = CSH.CategoricalHyperparameter(
            'channel_3', choices=['0.5', '1', '2', '3'], default_value='2')
        padding_3 = CSH.UniformIntegerHyperparameter('padding_3',
                                                     lower=0,
                                                     upper=3,
                                                     default_value=2)
        stride_3 = CSH.UniformIntegerHyperparameter('stride_3',
                                                    lower=1,
                                                    upper=2,
                                                    default_value=1)
        maxpool_3 = CSH.CategoricalHyperparameter('maxpool_3',
                                                  choices=['True', 'False'],
                                                  default_value='True')
        maxpool_kernel_3 = CSH.UniformIntegerHyperparameter('maxpool_kernel_3',
                                                            lower=2,
                                                            upper=6,
                                                            default_value=6)
        config_space.add_hyperparameters([
            kernel_3, padding_3, stride_3, maxpool_3, maxpool_kernel_3,
            channel_3
        ])
        # LAYER 3 CONDITIONALS
        maxpool_cond_3 = CS.NotEqualsCondition(maxpool_3, stride_3, 2)
        maxpool_kernel_cond_3 = CS.EqualsCondition(maxpool_kernel_3, maxpool_3,
                                                   'True')
        # LAYER 3 - RESTRICTING PADDING RANGE
        # Ensuring a padding domain of {0, 1, ..., floor(n/2)} for kernel_size n
        padding_3_cond_0 = CS.ForbiddenAndConjunction(
            CS.ForbiddenEqualsClause(kernel_3, '3'),
            CS.ForbiddenInClause(padding_3, [2, 3]))
        padding_3_cond_1 = CS.ForbiddenAndConjunction(
            CS.ForbiddenEqualsClause(kernel_3, '5'),
            CS.ForbiddenEqualsClause(padding_3, 3))
        config_space.add_forbidden_clauses(
            [padding_3_cond_0, padding_3_cond_1])
        # LAYER 3 ACTIVATE CONDITION
        # Layer 2 params will activate optionally only if n_conv_layer >= 3 (max 3 conv layers allowed currently)
        kernel_3_cond = CS.EqualsCondition(kernel_3, n_conv_layer, 3)
        channel_3_cond = CS.EqualsCondition(channel_3, n_conv_layer, 3)
        padding_3_cond = CS.EqualsCondition(padding_3, n_conv_layer, 3)
        stride_3_cond = CS.EqualsCondition(stride_3, n_conv_layer, 3)
        maxpool_3_cond = CS.AndConjunction(
            CS.InCondition(maxpool_3, n_conv_layer, [2, 3]), maxpool_cond_3)
        maxpool_kernel_3_cond = CS.AndConjunction(
            CS.InCondition(maxpool_kernel_3, n_conv_layer, [2, 3]),
            maxpool_kernel_cond_3)
        config_space.add_conditions([
            kernel_3_cond, channel_3_cond, padding_3_cond, stride_3_cond,
            maxpool_3_cond, maxpool_kernel_3_cond
        ])

        # COMPLICATED ASSUMPTIONS MADE EMPIRICALLY TO IMPOSE CONSTRAINTS ON VARIOUS PARAMETERS SUCH THAT THE
        # CONFIGURATIONS SAMPLED BY THE CONFIGURATOR DOESN'T YIELD AN ARCHITECTURE WITH SHAPE/DIMENSION MISMATCH
        # FOLLOWING BASIC ASSUMPTIONS WERE MADE:
        #   1) AT MAX 3 CONVOLUTION LAYERS CAN BE FORMED
        #   2) CONVOLUTION KERNEL SIZE DOMAIN : {3, 5, 7}
        #   3) EACH CONVOLUTION LAYER MAY OR MAY NOT HAVE A MAXPOOL LAYER
        #   4) MAXPOOL KERNEL SIZE DOMAIN : {2, 3, 4, 5, 6}
        #   5) A CONVOLUTION WITH STRIDE 2 IS EQUIVALENT TO MAXPOOL - cannot occur together in same layer
        # MANY OTHER CONDITIONS WERE ADDED BASED ON OBSERVATION (a couple of them mentioned below):
        #   1) If n_conv_layer=3 then cannot have maxpool on all 3 layers
        #   2) Cannot use a convolution kernel of size 5 or 7 in the third layer
        #   ...
        for_two_layers_1 = CS.ForbiddenAndConjunction(
            # Disallowing large maxpool kernel in first layer for a 2-layer convoluiton
            CS.ForbiddenEqualsClause(n_conv_layer, 2),
            CS.ForbiddenInClause(maxpool_kernel_1, [3, 4, 5, 6]),
            CS.ForbiddenInClause(maxpool_kernel_2, [4, 5, 6]))
        for_two_layers_2 = CS.ForbiddenAndConjunction(
            # Disallowing large convolution filter following a large max pool
            CS.ForbiddenInClause(maxpool_kernel_1, [5, 6]),
            CS.ForbiddenInClause(kernel_2, ['5', '7']))
        for_two_layers_3 = CS.ForbiddenAndConjunction(
            # Disallowing large convolution filter following a large max pool
            CS.ForbiddenInClause(kernel_1, ['5', '7']),
            CS.ForbiddenEqualsClause(maxpool_1, 'True'),
            CS.ForbiddenInClause(kernel_2, ['5', '7']))
        for_three_layers_1_0 = CS.ForbiddenAndConjunction(
            # Constraining maxpool kernel sizes for a 3 layer convolution
            # Small maxpool kernel if subsequent layer contains another maxpool
            CS.ForbiddenEqualsClause(n_conv_layer, 3),
            CS.ForbiddenInClause(maxpool_kernel_1, [5, 6]))
        for_three_layers_1_1 = CS.ForbiddenAndConjunction(
            # Constraining maxpool kernel sizes for a 3 layer convolution
            # Small maxpool kernel if subsequent layer contains another maxpool
            CS.ForbiddenEqualsClause(n_conv_layer, 3),
            CS.ForbiddenInClause(maxpool_kernel_2, [4, 5, 6]))
        for_three_layers_1_2 = CS.ForbiddenAndConjunction(
            # Constraining maxpool kernel sizes for a 3 layer convolution
            # Small maxpool kernel if subsequent layer contains another maxpool
            CS.ForbiddenEqualsClause(n_conv_layer, 3),
            CS.ForbiddenInClause(maxpool_kernel_3, [3, 4, 5, 6]))
        for_three_layers_2 = CS.ForbiddenAndConjunction(
            # Constraining maxpool kernel sizes for a 3 layer convolution
            # Small maxpool kernel if subsequent layer contains another maxpoo)l
            CS.ForbiddenEqualsClause(n_conv_layer, 3),
            CS.ForbiddenInClause(maxpool_kernel_1, [3, 4, 5, 6]),
            CS.ForbiddenInClause(maxpool_kernel_3, [5, 6]))
        for_three_layers_3 = CS.ForbiddenAndConjunction(
            # Constraining maxpool kernel sizes for a 3 layer convolution
            CS.ForbiddenEqualsClause(n_conv_layer, 3),
            CS.ForbiddenInClause(maxpool_kernel_2, [3, 4, 5, 6]),
            CS.ForbiddenInClause(maxpool_kernel_3, [5, 6]))
        for_three_layers_4 = CS.ForbiddenAndConjunction(
            # Constraining maxpool kernel sizes for a 3 layer convolution
            CS.ForbiddenEqualsClause(n_conv_layer, 3),
            CS.ForbiddenEqualsClause(stride_2, 2),
            CS.ForbiddenInClause(maxpool_kernel_3, [5, 6]))
        for_three_layers_5 = CS.ForbiddenAndConjunction(
            # Disallowing large convolution filter following a large max pool
            CS.ForbiddenEqualsClause(n_conv_layer, 3),
            CS.ForbiddenEqualsClause(stride_1, 2),
            CS.ForbiddenEqualsClause(stride_2, 2),
            CS.ForbiddenInClause(maxpool_kernel_3, [3, 4, 5, 6]))
        for_three_layers_6 = CS.ForbiddenAndConjunction(
            # Disallowing large convolution filter following a large max pool
            CS.ForbiddenInClause(maxpool_kernel_2, [4, 5, 6]),
            CS.ForbiddenInClause(kernel_3, ['5', '7']))
        for_three_layers_7 = CS.ForbiddenAndConjunction(
            # Doesn't allow 3 consecutive maxpools with a large convolution mask in 3rd layer
            CS.ForbiddenEqualsClause(maxpool_1, 'True'),
            CS.ForbiddenEqualsClause(maxpool_2, 'True'),
            CS.ForbiddenInClause(kernel_3, ['3', '5', '7']),
            CS.ForbiddenEqualsClause(maxpool_3, 'True'))
        for_three_layers_8 = CS.ForbiddenAndConjunction(
            # Same as above, but stride=2 in place of maxpooling
            CS.ForbiddenEqualsClause(stride_1, 2),
            CS.ForbiddenEqualsClause(stride_2, 2),
            CS.ForbiddenInClause(kernel_3, ['3', '5', '7']),
            CS.ForbiddenEqualsClause(stride_3, 2))
        for_three_layers_9 = CS.ForbiddenAndConjunction(
            # Allow a multiplication factor of only 0.5 for a 1x1 convolution in third layer
            # And no padding
            CS.ForbiddenInClause(kernel_3, ['3', '5', '7']),
            CS.ForbiddenInClause(channel_3, ['0.5']),
            CS.ForbiddenInClause(padding_3, [1, 2, 3]))
        for_three_layers_10 = CS.ForbiddenAndConjunction(
            # Allow a multiplication factor of only 0.5 for a 1x1 convolution in third layer
            # And no padding
            CS.ForbiddenEqualsClause(kernel_3, '1'),
            CS.ForbiddenInClause(channel_3, ['1', '2', '3']),
            CS.ForbiddenInClause(padding_3, [1, 2, 3]))
        for_three_layers_11 = CS.ForbiddenAndConjunction(
            # Disallowing large convolution filter following a large max pool
            CS.ForbiddenInClause(kernel_2, ['5', '7']),
            CS.ForbiddenEqualsClause(maxpool_2, 'True'),
            CS.ForbiddenInClause(kernel_3, ['5', '7']))
        for_three_layers_12 = CS.ForbiddenAndConjunction(
            # Disallowing large convolution filter following a large max pool
            CS.ForbiddenInClause(kernel_2, ['5', '7']),
            CS.ForbiddenEqualsClause(maxpool_1, 'True'),
            CS.ForbiddenInClause(kernel_3, ['5', '7']))
        config_space.add_forbidden_clauses([
            for_two_layers_1, for_two_layers_2, for_two_layers_2,
            for_three_layers_1_0, for_three_layers_1_1, for_three_layers_1_2,
            for_three_layers_2, for_three_layers_3, for_three_layers_4,
            for_three_layers_5, for_three_layers_6, for_three_layers_7,
            for_three_layers_8, for_three_layers_9, for_three_layers_10,
            for_three_layers_11, for_three_layers_12
        ])
        # Forbidding a large convolution mask in the last layers
        last_layer_mask_1 = CS.ForbiddenAndConjunction(
            CS.ForbiddenEqualsClause(n_conv_layer, 3),
            CS.ForbiddenEqualsClause(kernel_3, '7'))
        last_layer_mask_2 = CS.ForbiddenAndConjunction(
            CS.ForbiddenEqualsClause(n_conv_layer, 2),
            CS.ForbiddenEqualsClause(kernel_2, '7'))
        config_space.add_forbidden_clauses(
            [last_layer_mask_1, last_layer_mask_2])

        # INTERMEDIATE FULLY CONNECTED LAYER PARAMS AND CONDITIONS (NOT OUTPUT LAYER)
        # Choosing min size as height/width of image, max as height x width of image
        # Max of 784 >>> output of the convolutions and pooling, hence adequately expressed
        fc1 = CSH.UniformIntegerHyperparameter('fc_1',
                                               lower=28,
                                               upper=784,
                                               default_value=500,
                                               log=True)
        fc2 = CSH.UniformIntegerHyperparameter('fc_2',
                                               lower=28,
                                               upper=784,
                                               default_value=500,
                                               log=True)
        config_space.add_hyperparameters([fc1, fc2])
        # FC layers exists only if n_fc_layer > 1
        fc1_cond = CS.InCondition(fc1, n_fc_layer, [2, 3])
        fc2_cond = CS.EqualsCondition(fc2, n_fc_layer, 3)
        config_space.add_conditions([fc1_cond, fc2_cond])

        return (config_space)
Beispiel #14
0
    def fit_search_space(self, df, fac_numerical=0.125, fac_categorical=10):
        """Apply prior-guided transfer learning based on a DataFrame of results.

        Example Usage:

        >>> search = CBO(problem, evaluator)
        >>> search.fit_surrogate("results.csv")

        Args:
            df (str|DataFrame): a checkpoint from a previous search.
            fac_numerical (float): the factor used to compute the sigma of a truncated normal distribution based on ``sigma = max(1.0, (upper - lower) * fac_numerical)``. A small large factor increase exploration while a small factor increase exploitation around the best-configuration from the ``df`` parameter.
            fac_categorical (float): the weight given to a categorical feature part of the best configuration. A large weight ``> 1`` increase exploitation while a small factor close to ``1`` increase exploration.
        """

        if type(df) is str and df[-4:] == ".csv":
            df = pd.read_csv(df)
        assert isinstance(df, pd.DataFrame)

        # check single or multiple objectives
        if "objective" in df.columns:
            # filter failures
            if pd.api.types.is_string_dtype(df.objective):
                df = df[~df.objective.str.startswith("F")]
                df.objective = df.objective.astype(float)
        else:
            # filter failures
            objcol = df.filter(regex=r"^objective_\d+$").columns
            for col in objcol:
                if pd.api.types.is_string_dtype(df[col]):
                    df = df[~df[col].str.startswith("F")]
                    df[col] = df[col].astype(float)

        cst = self._problem.space
        if type(cst) != CS.ConfigurationSpace:
            logging.error(f"{type(cst)}: not supported for trainsfer learning")

        res_df = df
        res_df_names = res_df.columns.values
        if "objective" in df.columns:
            best_index = np.argmax(res_df["objective"].values)
            best_param = res_df.iloc[best_index]
        else:
            best_index = non_dominated_set(-np.asarray(res_df[objcol]),
                                           return_mask=False)[0]
            best_param = res_df.iloc[best_index]

        cst_new = CS.ConfigurationSpace(
            seed=self._random_state.randint(0, 2**32))
        hp_names = cst.get_hyperparameter_names()
        for hp_name in hp_names:
            hp = cst.get_hyperparameter(hp_name)
            if hp_name in res_df_names:
                if (type(hp) is csh.UniformIntegerHyperparameter
                        or type(hp) is csh.UniformFloatHyperparameter):
                    mu = best_param[hp.name]
                    lower = hp.lower
                    upper = hp.upper
                    sigma = max(1.0, (upper - lower) * fac_numerical)
                    if type(hp) is csh.UniformIntegerHyperparameter:
                        param_new = csh.NormalIntegerHyperparameter(
                            name=hp.name,
                            default_value=mu,
                            mu=mu,
                            sigma=sigma,
                            lower=lower,
                            upper=upper,
                        )
                    else:  # type is csh.UniformFloatHyperparameter:
                        param_new = csh.NormalFloatHyperparameter(
                            name=hp.name,
                            default_value=mu,
                            mu=mu,
                            sigma=sigma,
                            lower=lower,
                            upper=upper,
                        )
                    cst_new.add_hyperparameter(param_new)
                elif (type(hp) is csh.CategoricalHyperparameter
                      or type(hp) is csh.OrdinalHyperparameter):
                    if type(hp) is csh.OrdinalHyperparameter:
                        choices = hp.sequence
                    else:
                        choices = hp.choices
                    weights = len(choices) * [1.0]
                    index = choices.index(best_param[hp.name])
                    weights[index] = fac_categorical
                    norm_weights = [float(i) / sum(weights) for i in weights]
                    param_new = csh.CategoricalHyperparameter(
                        name=hp.name, choices=choices, weights=norm_weights)
                    cst_new.add_hyperparameter(param_new)
                else:
                    logging.warning(
                        f"Not fitting {hp} because it is not supported!")
                    cst_new.add_hyperparameter(hp)
            else:
                logging.warning(
                    f"Not fitting {hp} because it was not found in the dataframe!"
                )
                cst_new.add_hyperparameter(hp)

        # For conditions
        for cond in cst.get_conditions():
            if type(cond) == CS.AndConjunction or type(
                    cond) == CS.OrConjunction:
                cond_list = []
                for comp in cond.components:
                    cond_list.append(self._return_cond(comp, cst_new))
                if type(cond) is CS.AndConjunction:
                    cond_new = CS.AndConjunction(*cond_list)
                elif type(cond) is CS.OrConjunction:
                    cond_new = CS.OrConjunction(*cond_list)
                else:
                    logging.warning(
                        f"Condition {type(cond)} is not implemented!")
            else:
                cond_new = self._return_cond(cond, cst_new)
            cst_new.add_condition(cond_new)

        # For forbiddens
        for cond in cst.get_forbiddens():
            if type(cond) is CS.ForbiddenAndConjunction:
                cond_list = []
                for comp in cond.components:
                    cond_list.append(self._return_forbid(comp, cst_new))
                cond_new = CS.ForbiddenAndConjunction(*cond_list)
            elif (type(cond) is CS.ForbiddenEqualsClause
                  or type(cond) is CS.ForbiddenInClause):
                cond_new = self._return_forbid(cond, cst_new)
            else:
                logging.warning(f"Forbidden {type(cond)} is not implemented!")
            cst_new.add_forbidden_clause(cond_new)

        self._opt_kwargs["dimensions"] = cst_new
Beispiel #15
0
    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
                                        min_mlp_layers: int = 1,
                                        max_mlp_layers: int = 15,
                                        dropout: bool = True,
                                        min_num_units: int = 10,
                                        max_num_units: int = 1024,
                                        ) -> ConfigurationSpace:

        cs = ConfigurationSpace()

        # The number of hidden layers the network will have.
        # Layer blocks are meant to have the same architecture, differing only
        # by the number of units
        num_groups = UniformIntegerHyperparameter(
            "num_groups", min_mlp_layers, max_mlp_layers, default_value=5)

        activation = CategoricalHyperparameter(
            "activation", choices=list(_activations.keys())
        )
        cs.add_hyperparameters([num_groups, activation])

        # We can have dropout in the network for
        # better generalization
        if dropout:
            use_dropout = CategoricalHyperparameter(
                "use_dropout", choices=[True, False])
            cs.add_hyperparameters([use_dropout])

        for i in range(1, max_mlp_layers + 1):
            n_units_hp = UniformIntegerHyperparameter("num_units_%d" % i,
                                                      lower=min_num_units,
                                                      upper=max_num_units,
                                                      default_value=20)
            cs.add_hyperparameter(n_units_hp)

            if i > min_mlp_layers:
                # The units of layer i should only exist
                # if there are at least i layers
                cs.add_condition(
                    CS.GreaterThanCondition(
                        n_units_hp, num_groups, i - 1
                    )
                )

            if dropout:
                dropout_hp = UniformFloatHyperparameter(
                    "dropout_%d" % i,
                    lower=0.0,
                    upper=0.8,
                    default_value=0.5
                )
                cs.add_hyperparameter(dropout_hp)
                dropout_condition_1 = CS.EqualsCondition(dropout_hp, use_dropout, True)

                if i > min_mlp_layers:
                    dropout_condition_2 = CS.GreaterThanCondition(dropout_hp, num_groups, i - 1)
                    cs.add_condition(CS.AndConjunction(dropout_condition_1, dropout_condition_2))
                else:
                    cs.add_condition(dropout_condition_1)

        return cs
Beispiel #16
0
    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
                                        min_num_gropus: int = 1,
                                        max_num_groups: int = 9,
                                        min_blocks_per_groups: int = 1,
                                        max_blocks_per_groups: int = 4,
                                        min_num_units: int = 10,
                                        max_num_units: int = 1024,
                                        ) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        # The number of groups that will compose the resnet. That is,
        # a group can have N Resblock. The M number of this N resblock
        # repetitions is num_groups
        num_groups = UniformIntegerHyperparameter(
            "num_groups", lower=min_num_gropus, upper=max_num_groups, default_value=5)

        activation = CategoricalHyperparameter(
            "activation", choices=list(_activations.keys())
        )
        cs.add_hyperparameters([num_groups, activation])

        # We can have dropout in the network for
        # better generalization
        use_dropout = CategoricalHyperparameter(
            "use_dropout", choices=[True, False])
        cs.add_hyperparameters([use_dropout])

        use_shake_shake = CategoricalHyperparameter("use_shake_shake", choices=[True, False])
        use_shake_drop = CategoricalHyperparameter("use_shake_drop", choices=[True, False])
        shake_drop_prob = UniformFloatHyperparameter(
            "max_shake_drop_probability", lower=0.0, upper=1.0)
        cs.add_hyperparameters([use_shake_shake, use_shake_drop, shake_drop_prob])
        cs.add_condition(CS.EqualsCondition(shake_drop_prob, use_shake_drop, True))

        # It is the upper bound of the nr of groups,
        # since the configuration will actually be sampled.
        for i in range(0, max_num_groups + 1):

            n_units = UniformIntegerHyperparameter(
                "num_units_%d" % i,
                lower=min_num_units,
                upper=max_num_units,
            )
            blocks_per_group = UniformIntegerHyperparameter(
                "blocks_per_group_%d" % i, lower=min_blocks_per_groups,
                upper=max_blocks_per_groups)

            cs.add_hyperparameters([n_units, blocks_per_group])

            if i > 1:
                cs.add_condition(CS.GreaterThanCondition(n_units, num_groups, i - 1))
                cs.add_condition(CS.GreaterThanCondition(blocks_per_group, num_groups, i - 1))

            this_dropout = UniformFloatHyperparameter(
                "dropout_%d" % i, lower=0.0, upper=1.0
            )
            cs.add_hyperparameters([this_dropout])

            dropout_condition_1 = CS.EqualsCondition(this_dropout, use_dropout, True)

            if i > 1:

                dropout_condition_2 = CS.GreaterThanCondition(this_dropout, num_groups, i - 1)

                cs.add_condition(CS.AndConjunction(dropout_condition_1, dropout_condition_2))
            else:
                cs.add_condition(dropout_condition_1)
        return cs