예제 #1
0
def optimize():
    # We load the iris-dataset (a widely used benchmark)
    iris = datasets.load_iris()

    #logger = logging.getLogger("SVMExample")
    logging.basicConfig(level=logging.INFO)  # logging.DEBUG for debug output

    # Build Configuration Space which defines all parameters and their ranges
    cs = ConfigurationSpace()

    # We define a few possible types of SVM-kernels and add them as "kernel" to our cs
    kernel = CategoricalHyperparameter("kernel", ["linear", "rbf", "poly", "sigmoid"], default="poly")
    cs.add_hyperparameter(kernel)

    # There are some hyperparameters shared by all kernels
    C = UniformFloatHyperparameter("C", 0.001, 1000.0, default=1.0)
    shrinking = CategoricalHyperparameter("shrinking", ["true", "false"], default="true")
    cs.add_hyperparameters([C, shrinking])

    # Others are kernel-specific, so we can add conditions to limit the searchspace
    degree = UniformIntegerHyperparameter("degree", 1, 5, default=3)     # Only used by kernel poly
    coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0, default=0.0)  # poly, sigmoid
    cs.add_hyperparameters([degree, coef0])
    use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
    use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"])
    cs.add_conditions([use_degree, use_coef0])

    # This also works for parameters that are a mix of categorical and values from a range of numbers
    # For example, gamma can be either "auto" or a fixed float
    gamma = CategoricalHyperparameter("gamma", ["auto", "value"], default="auto")  # only rbf, poly, sigmoid
    gamma_value = UniformFloatHyperparameter("gamma_value", 0.0001, 8, default=1)
    cs.add_hyperparameters([gamma, gamma_value])
    # We only activate gamma_value if gamma is set to "value"
    cs.add_condition(InCondition(child=gamma_value, parent=gamma, values=["value"]))
    # And again we can restrict the use of gamma in general to the choice of the kernel
    cs.add_condition(InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"]))


    # Scenario object
    scenario = Scenario("test/test_files/svm_scenario.txt")

    # Example call of the function
    # It returns: Status, Cost, Runtime, Additional Infos
    def_value = svm_from_cfg(cs.get_default_configuration())
    print("Default Value: %.2f" % (def_value))

    # Optimize, using a SMAC-object
    print("Optimizing! Depending on your machine, this might take a few minutes.")
    smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
            tae_runner=svm_from_cfg)

    incumbent = smac.optimize()
    inc_value = svm_from_cfg(incumbent)

    print("Optimized Value: %.2f" % (inc_value))
예제 #2
0
 def _get_acm_cs(self):
     """
         returns a configuration space 
         designed for querying ~smac.optimizer.smbo._component_builder
         
         Returns
         ------- 
             ConfigurationSpace
     """
     
     cs = ConfigurationSpace()
     cs.seed(self.rng.randint(0,2**20))
     
     model = CategoricalHyperparameter("model", choices=("RF", "GP"))
     
     num_trees = Constant("num_trees", value=10)
     bootstrap = CategoricalHyperparameter("do_bootstrapping", choices=(True, False), default_value=True)
     ratio_features = CategoricalHyperparameter("ratio_features", choices=(3 / 6, 4 / 6, 5 / 6, 1), default_value=1)
     min_split = UniformIntegerHyperparameter("min_samples_to_split", lower=1, upper=10, default_value=2)
     min_leaves = UniformIntegerHyperparameter("min_samples_in_leaf", lower=1, upper=10, default_value=1)
     
     cs.add_hyperparameters([model, num_trees, bootstrap, ratio_features, min_split, min_leaves])
     
     inc_num_trees = InCondition(num_trees, model, ["RF"])
     inc_bootstrap = InCondition(bootstrap, model, ["RF"])
     inc_ratio_features = InCondition(ratio_features, model, ["RF"])
     inc_min_split = InCondition(min_split, model, ["RF"])
     inc_min_leavs = InCondition(min_leaves, model, ["RF"])
     
     cs.add_conditions([inc_num_trees, inc_bootstrap, inc_ratio_features, inc_min_split, inc_min_leavs])
     
     acq  = CategoricalHyperparameter("acq_func", choices=("EI", "LCB", "PI", "LogEI"))
     par_ei = UniformFloatHyperparameter("par_ei", lower=-10, upper=10)
     par_pi = UniformFloatHyperparameter("par_pi", lower=-10, upper=10)
     par_logei = UniformFloatHyperparameter("par_logei", lower=0.001, upper=100, log=True)
     par_lcb = UniformFloatHyperparameter("par_lcb", lower=0.0001, upper=0.9999)
     
     cs.add_hyperparameters([acq, par_ei, par_pi, par_logei, par_lcb])
     
     inc_par_ei = InCondition(par_ei, acq, ["EI"])
     inc_par_pi = InCondition(par_pi, acq, ["PI"])
     inc_par_logei = InCondition(par_logei, acq, ["LogEI"])
     inc_par_lcb = InCondition(par_lcb, acq, ["LCB"])
     
     cs.add_conditions([inc_par_ei, inc_par_pi, inc_par_logei, inc_par_lcb])
     
     return cs
예제 #3
0
    def config_space(self):
        """SVC hyperparameter space."""

        C_param = UniformFloatHyperparameter(
            'C', lower=1e-8, upper=100.0, default_value=1.0
        )
        shrinking = CategoricalHyperparameter(
            'shrinking', [True, False], default_value=True
        )
        kernel = CategoricalHyperparameter(
            'kernel', ['linear', 'rbf', 'poly', 'sigmoid'],
        )
        degree = UniformIntegerHyperparameter(
            'degree', lower=1, upper=5, default_value=2
        )
        coef0 = UniformFloatHyperparameter(
            'coef0', lower=0.0, upper=10.0, default_value=0.0
        )
        # Add hyperparameters to config space.
        config = ConfigurationSpace()
        config.seed(self.random_state)
        config.add_hyperparameters(
            (
                C_param,
                shrinking,
                kernel,
                degree,
                coef0,
            )
        )
        # Conditionals on hyperparameters specific to kernels.
        config.add_conditions(
            (
                InCondition(child=degree, parent=kernel, values=['poly']),
                InCondition(
                    child=coef0, parent=kernel, values=['poly', 'sigmoid']
                )
            )
        )
        return config
예제 #4
0
    # Adding conditions to restrict the hyperparameter space
    # Since learning rate is used when solver is 'sgd'
    use_lr = CS.conditions.EqualsCondition(child=learning_rate,
                                           parent=solver,
                                           value='sgd')
    # Since learning rate initialization will only be accounted for when using 'sgd' or 'adam'
    use_lr_init = CS.conditions.InCondition(child=learning_rate_init,
                                            parent=solver,
                                            values=['sgd', 'adam'])
    # Since batch size will not be considered when optimizer is 'lbfgs'
    use_batch_size = CS.conditions.InCondition(child=batch_size,
                                               parent=solver,
                                               values=['sgd', 'adam'])

    # We can also add  multiple conditions on hyperparameters at once:
    cs.add_conditions([use_lr, use_batch_size, use_lr_init])

    # SMAC scenario object
    scenario = Scenario({
        'run_obj': 'quality',  # we optimize quality (alternative to runtime)
        'wallclock-limit':
        100,  # max duration to run the optimization (in seconds)
        'cs': cs,  # configuration space
        'deterministic': 'true',
        'limit_resources': True,  # Uses pynisher to limit memory and runtime
        # Alternatively, you can also disable this.
        # Then you should handle runtime and memory yourself in the TA
        'cutoff': 30,  # runtime limit for target algorithm
        'memory_limit':
        3072,  # adapt this to reasonable value for your hardware
    })
예제 #5
0
파일: svm.py 프로젝트: TQCAI/SMAC3
# There are some hyperparameters shared by all kernels
C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0)
shrinking = CategoricalHyperparameter("shrinking", ["true", "false"],
                                      default_value="true")
cs.add_hyperparameters([C, shrinking])

# Others are kernel-specific, so we can add conditions to limit the searchspace
degree = UniformIntegerHyperparameter(
    "degree", 1, 5, default_value=3)  # Only used by kernel poly
coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0,
                                   default_value=0.0)  # poly, sigmoid
cs.add_hyperparameters([degree, coef0])
use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"])
cs.add_conditions([use_degree, use_coef0])

# This also works for parameters that are a mix of categorical and values from a range of numbers
# For example, gamma can be either "auto" or a fixed float
gamma = CategoricalHyperparameter(
    "gamma", ["auto", "value"],
    default_value="auto")  # only rbf, poly, sigmoid
gamma_value = UniformFloatHyperparameter("gamma_value",
                                         0.0001,
                                         8,
                                         default_value=1)
cs.add_hyperparameters([gamma, gamma_value])
# We only activate gamma_value if gamma is set to "value"
cs.add_condition(InCondition(child=gamma_value, parent=gamma,
                             values=["value"]))
# And again we can restrict the use of gamma in general to the choice of the kernel
예제 #6
0
def main_loop(problem):
    logging.basicConfig(level=logging.INFO)  # logging.DEBUG for debug output
    cs = ConfigurationSpace()

    n_estimators = UniformIntegerHyperparameter("n_estimators",
                                                5,
                                                50,
                                                default_value=10)
    #criterion = CategoricalHyperparameter("criterion", ["mse", "mae"], default_value="mse")
    min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                     2,
                                                     20,
                                                     default_value=2)
    min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                    1,
                                                    20,
                                                    default_value=1)
    min_weight_fraction_leaf = UniformFloatHyperparameter(
        "min_weight_fraction_leaf", 0.0, 0.5, default_value=0.0)
    max_leaf_nodes = UniformIntegerHyperparameter("max_leaf_nodes",
                                                  10,
                                                  1000,
                                                  default_value=100)
    min_impurity_decrease = UniformFloatHyperparameter("min_impurity_decrease",
                                                       0.0,
                                                       0.5,
                                                       default_value=0.0)
    warm_start = CategoricalHyperparameter("warm_start", ["true", "false"],
                                           default_value="false")

    cs.add_hyperparameters([
        n_estimators, min_weight_fraction_leaf, min_samples_split,
        min_samples_leaf, max_leaf_nodes, warm_start, min_impurity_decrease
    ])

    max_features = CategoricalHyperparameter(
        "max_features", ["auto", "log2", "sqrt", "int", "None", "float"],
        default_value="auto")  # only rbf, poly, sigmoid
    max_features_int = UniformIntegerHyperparameter("max_features_int",
                                                    2,
                                                    len(X[0]),
                                                    default_value=5)
    max_features_float = UniformFloatHyperparameter("max_features_float",
                                                    0.0,
                                                    0.9,
                                                    default_value=0.0)
    cs.add_hyperparameters(
        [max_features, max_features_int, max_features_float])
    use_max_features_int = InCondition(child=max_features_int,
                                       parent=max_features,
                                       values=["int"])
    use_max_features_float = InCondition(child=max_features_float,
                                         parent=max_features,
                                         values=["float"])
    cs.add_conditions([use_max_features_int, use_max_features_float])

    max_depth = CategoricalHyperparameter("max_depth", ["None", "value"],
                                          default_value="None")
    max_depth_value = UniformIntegerHyperparameter("max_depth_value",
                                                   2,
                                                   20,
                                                   default_value=5)
    cs.add_hyperparameters([max_depth, max_depth_value])
    cs.add_condition(
        InCondition(child=max_depth_value, parent=max_depth, values=["value"]))

    random_state = CategoricalHyperparameter("random_state", ["None", "value"],
                                             default_value="None")
    random_state_value = UniformIntegerHyperparameter("random_state_value",
                                                      1,
                                                      20,
                                                      default_value=1)
    cs.add_hyperparameters([random_state, random_state_value])
    cs.add_condition(
        InCondition(child=random_state_value,
                    parent=random_state,
                    values=["value"]))

    with open("/home/naamah/Documents/CatES/result_All/X1.p", "rb") as fp:
        X = pickle.load(fp)

    # Scenario object
    max_eval = 100000
    scenario = Scenario({
        "run_obj":
        "quality",  # we optimize quality (alternatively runtime)
        "runcount-limit":
        max_eval,  # maximum function evaluations
        "cs":
        cs,  # configuration space
        "shared_model":
        True,
        "output_dir":
        "/home/naamah/Documents/CatES/result_All/smac/RF/run_{}_{}_{}".format(
            max_eval,
            datetime.datetime.fromtimestamp(
                time.time()).strftime('%Y-%m-%d_%H:%M:%S'), problem),
        "input_psmac_dirs":
        "/home/naamah/Documents/CatES/result_All/smac/psmac",
        "deterministic":
        "False"
    })

    def_value = svm_from_cfg(cs.get_default_configuration())
    print("Default Value: %.2f" % (def_value))

    # Optimize, using a SMAC-object
    print(
        "Optimizing! Depending on your machine, this might take a few minutes."
    )
    smac = SMAC(scenario=scenario, tae_runner=svm_from_cfg)

    incumbent = smac.optimize()
    inc_value = svm_from_cfg(incumbent)
    print("Optimized Value: %.2f" % (inc_value))

    return (incumbent)


# main_loop()
예제 #7
0
# Others are kernel-specific, so we can add conditions to limit the searchspace

use_eta0 = InCondition(child=eta0,
                       parent=learning_rate,
                       values=["constant", "invscaling", "adaptive"])

use_power_t = InCondition(child=power_t,
                          parent=learning_rate,
                          values=["invscaling"])

use_l1_ratio = InCondition(child=l1_ratio,
                           parent=penalty,
                           values=["elasticnet"])

cs.add_conditions([use_eta0, use_power_t, use_l1_ratio])

# Scenario object
scenario = Scenario({
    "run_obj": "quality",  # we optimize quality (alternatively runtime)
    "runcount-limit":
    500,  # max. number of function evaluations; for this example set to a low number
    "cs": cs,  # configuration space
    "deterministic": "true"
})

# Example call of the function
# It returns: Status, Cost, Runtime, Additional Infos
def_value = SGD_from_cfg(cs.get_default_configuration())
print("Default Value: %.2f" % (def_value))
예제 #8
0
파일: hpo.py 프로젝트: felisat/fl_distill
def run_hpo(args, tae_runner):
    # create empty config space
    cs = ConfigurationSpace()
    #
    local_epochs = UniformIntegerHyperparameter("local_epochs",
                                                1,
                                                20,
                                                default_value=10)
    distill_epochs = UniformIntegerHyperparameter("distill_epochs",
                                                  1,
                                                  20,
                                                  default_value=1)

    #
    fallback = CategoricalHyperparameter("fallback", [True, False],
                                         default_value=True)
    lambda_outlier = UniformFloatHyperparameter("lambda_outlier",
                                                0.0,
                                                10.0,
                                                default_value=1.0)
    lambda_fedprox = UniformFloatHyperparameter("lambda_fedprox",
                                                0.000001,
                                                10.0,
                                                default_value=0.01,
                                                log=True)

    #
    mixture_coefficients_base = UniformFloatHyperparameter(
        "mixture_coefficients_base", 0.0, 1.0, default_value=0.5)

    #
    local_optimizer = CategoricalHyperparameter("local_optimizer",
                                                ["Adam", "SGD"],
                                                default_value="Adam")
    adam_lr = UniformFloatHyperparameter("adam_lr",
                                         0.00001,
                                         1.0,
                                         default_value=0.001,
                                         log=True)
    sgd_lr = UniformFloatHyperparameter("sgd_lr",
                                        0.00001,
                                        1.0,
                                        default_value=0.1,
                                        log=True)

    cs.add_hyperparameters([
        local_epochs, distill_epochs, fallback, lambda_outlier, lambda_fedprox,
        mixture_coefficients_base, local_optimizer, adam_lr, sgd_lr
    ])

    use_adam_lr = EqualsCondition(child=adam_lr,
                                  parent=local_optimizer,
                                  value='Adam')
    use_sgd_lr = EqualsCondition(child=sgd_lr,
                                 parent=local_optimizer,
                                 value='SGD')
    cs.add_conditions([use_adam_lr, use_sgd_lr])

    #

    # Scenario object
    scenario = Scenario({
        "run_obj": "quality",  # we optimize quality (alternatively runtime)
        "runcount-limit":
        100,  # max. number of function evaluations; for this example set to a low number
        "cs": cs,  # configuration space
        "deterministic": True,
        "shared_model": True,
        "input_psmac_dirs": args.SHARE_PATH + '*',
        "output_dir": args.SHARE_PATH,
        "limit_resources": False
    })

    max_iters = 50
    # intensifier parameters
    intensifier_kwargs = {
        'initial_budget': 5,
        'max_budget': max_iters,
        'eta': 3
    }

    # Optimize, using a SMAC-object
    print("Optimizing!")
    smac = BOHB4HPO(
        scenario=scenario,
        tae_runner=tae_runner,
        intensifier_kwargs=intensifier_kwargs,
        n_jobs=args.WORKERS,
    )

    # Start optimization
    try:
        incumbent = smac.optimize()
    finally:
        incumbent = smac.solver.incumbent

    inc_value = smac.get_tae_runner().run(config=incumbent,
                                          instance='2',
                                          budget=1,
                                          seed=0)[1]
    print("Optimized Value: %.4f" % inc_value)
def generate_data(smac_class,
                  n_runs=1,
                  output_dir: Union[str, Path] = ".",
                  dataset=None,
                  runcount_limit=50):
    output_dir = Path(output_dir)

    if dataset is None:
        dataset = datasets.load_iris()

    def svm_from_cfg(cfg):
        """ Creates a SVM based on a configuration and evaluates it on the
        iris-dataset using cross-validation.
        Parameters:
        -----------
        cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
            Configuration containing the parameters.
            Configurations are indexable!
        Returns:
        --------
        A crossvalidated mean score for the svm on the loaded data2-set.
        """
        # For deactivated parameters, the configuration stores None-values.
        # This is not accepted by the SVM, so we remove them.
        cfg = {k: cfg[k] for k in cfg if cfg[k]}
        # We translate boolean values:
        cfg["shrinking"] = True if cfg["shrinking"] == "true" else False
        # And for gamma, we set it to a fixed value or to "auto" (if used)
        if "gamma" in cfg:
            cfg["gamma"] = cfg["gamma_value"] if cfg[
                "gamma"] == "value" else "auto"
            cfg.pop("gamma_value", None)  # Remove "gamma_value"

        clf = svm.SVC(**cfg, random_state=None)

        scores = cross_val_score(clf, dataset.data, dataset.target, cv=5)
        return 1 - np.mean(scores)  # Minimize!

    # logger = logging.getLogger("SVMExample")
    logging.basicConfig(level=logging.INFO)  # logging.DEBUG for debug output

    # Build Configuration Space which defines all parameters and their ranges
    cs = ConfigurationSpace()

    # We define a few possible types of SVM-kernels and add them as "kernel" to our cs
    kernel = CategoricalHyperparameter("kernel",
                                       ["linear", "rbf", "poly", "sigmoid"],
                                       default_value="poly")
    cs.add_hyperparameter(kernel)

    # There are some hyperparameters shared by all kernels
    C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0)
    shrinking = CategoricalHyperparameter("shrinking", ["true", "false"],
                                          default_value="true")
    cs.add_hyperparameters([C, shrinking])

    # Others are kernel-specific, so we can add conditions to limit the searchspace
    degree = UniformIntegerHyperparameter(
        "degree", 1, 5, default_value=3)  # Only used by kernel poly
    coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0,
                                       default_value=0.0)  # poly, sigmoid
    cs.add_hyperparameters([degree, coef0])
    use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
    use_coef0 = InCondition(child=coef0,
                            parent=kernel,
                            values=["poly", "sigmoid"])
    cs.add_conditions([use_degree, use_coef0])

    # This also works for parameters that are a mix of categorical and values from a range of numbers
    # For example, gamma can be either "auto" or a fixed float
    gamma = CategoricalHyperparameter(
        "gamma", ["auto", "value"],
        default_value="auto")  # only rbf, poly, sigmoid
    gamma_value = UniformFloatHyperparameter("gamma_value",
                                             0.0001,
                                             8,
                                             default_value=1)
    cs.add_hyperparameters([gamma, gamma_value])
    # We only activate gamma_value if gamma is set to "value"
    cs.add_condition(
        InCondition(child=gamma_value, parent=gamma, values=["value"]))
    # And again we can restrict the use of gamma in general to the choice of the kernel
    cs.add_condition(
        InCondition(child=gamma,
                    parent=kernel,
                    values=["rbf", "poly", "sigmoid"]))

    # Scenario object
    for i in range(n_runs):
        scenario = Scenario({
            "run_obj":
            "quality",  # we optimize quality (alternatively runtime)
            "runcount-limit":
            runcount_limit,
            # max. number of function evaluations; for this example set to a low number
            "cs":
            cs,  # configuration space
            "deterministic":
            "true",
            "limit_resources":
            "false",
            "output_dir":
            str((output_dir / smac_class.__name__ / f"{i:02d}").absolute())
        })

        # Example call of the function
        # It returns: Status, Cost, Runtime, Additional Infos
        # def_value = svm_from_cfg(cs.get_default_configuration())
        # print(f"Default Value: {def_value:.2f}")
        #
        # Optimize, using a SMAC-object
        smac = smac_class(scenario=scenario, rng=None, tae_runner=svm_from_cfg)

        incumbent = smac.optimize()
        #
        inc_value = svm_from_cfg(incumbent)
        #
        # print(f"Optimized Value: {inc_value:.2f}")
        #
        # # We can also validate our results (though this makes a lot more sense with instances)
        smac.validate(
            config_mode='inc',  # We can choose which configurations to evaluate
            # instance_mode='train+test',  # Defines what instances to validate
            repetitions=
            100,  # Ignored, unless you set "deterministic" to "false" in line 95
            n_jobs=1)  # How many cores to use in parallel for optimization
예제 #10
0
        # tensorboard_freq=10,
        training_epochs=optimization_epochs,
        batch_size=50
    )

    acc, loss = network.validate(val_feats, val_labels, show_partial=False)

    return 1/acc


# logger = logging.getLogger("Hyperparameter optimization")
# logging.basicConfig(level=logging.INFO)

config_space = ConfigurationSpace()
config_space.add_hyperparameters(list(space.values()))
config_space.add_conditions(hyper_space_conditions)


scenario_dict = {"run_obj": "quality",
                 "runcount-limit": space_optimization_evals,
                 "cs": config_space,
                 "deterministic": "true",
                 "output-dir": project_data.OUT_DIR + '/smac/'
                 }

scenario = Scenario(scenario_dict)
runhistory = RunHistory(aggregate_func=None)
stats = Stats(scenario)

smac = SMAC(scenario=scenario,
            runhistory=runhistory,
예제 #11
0
def get_config_space():

    # Build Configuration Space which defines all parameters and their ranges
    cs = ConfigurationSpace()
    algorithm = CategoricalHyperparameter("algorithm", ["nn", "svm", "rf"],
                                          default="rf")

    do_bootstrapping = CategoricalHyperparameter("do_bootstrapping",
                                                 ["true", "false"],
                                                 default="true")
    num_trees = UniformIntegerHyperparameter("num_trees", 10, 50, default=10)
    max_features = UniformIntegerHyperparameter("max_features",
                                                1,
                                                boston.data.shape[1],
                                                default=1)
    min_weight_frac_leaf = UniformFloatHyperparameter("min_weight_frac_leaf",
                                                      0.0,
                                                      0.5,
                                                      default=0.0)
    criterion = CategoricalHyperparameter("criterion", ["mse", "mae"],
                                          default="mse")
    min_samples_to_split = UniformIntegerHyperparameter("min_samples_to_split",
                                                        2,
                                                        20,
                                                        default=2)
    min_samples_in_leaf = UniformIntegerHyperparameter("min_samples_in_leaf",
                                                       1,
                                                       20,
                                                       default=1)
    max_leaf_nodes = UniformIntegerHyperparameter("max_leaf_nodes",
                                                  10,
                                                  1000,
                                                  default=100)

    use_do_bootstrapping = InCondition(child=do_bootstrapping,
                                       parent=algorithm,
                                       values=["rf"])
    use_num_trees = InCondition(child=num_trees,
                                parent=algorithm,
                                values=["rf"])
    use_max_features = InCondition(child=max_features,
                                   parent=algorithm,
                                   values=["rf"])
    use_min_wieght_frac_leaf = InCondition(child=min_weight_frac_leaf,
                                           parent=algorithm,
                                           values=["rf"])
    use_criterion = InCondition(child=criterion,
                                parent=algorithm,
                                values=["rf"])
    use_min_samples_in_leaf = InCondition(child=min_samples_in_leaf,
                                          parent=algorithm,
                                          values=["rf"])
    use_min_samples_to_split = InCondition(child=min_samples_to_split,
                                           parent=algorithm,
                                           values=["rf"])
    use_max_leaf_nodes = InCondition(child=max_leaf_nodes,
                                     parent=algorithm,
                                     values=["rf"])

    cs.add_hyperparameters([
        do_bootstrapping, num_trees, min_weight_frac_leaf, criterion,
        max_features, min_samples_to_split, min_samples_in_leaf, max_leaf_nodes
    ])
    cs.add_conditions([
        use_criterion, use_num_trees, use_max_features, use_max_leaf_nodes,
        use_do_bootstrapping, use_min_samples_in_leaf,
        use_min_samples_to_split, use_min_wieght_frac_leaf
    ])
    # We define a few possible types of SVM-kernels and add them as "kernel" to our cs
    kernel = CategoricalHyperparameter("kernel",
                                       ["linear", "rbf", "poly", "sigmoid"],
                                       default="poly")
    cs.add_hyperparameter(kernel)

    # There are some hyperparameters shared by all kernels
    C = UniformFloatHyperparameter("C", 0.001, 1000.0, default=1.0)
    use_C = InCondition(child=C, parent=algorithm, values=["svm"])
    shrinking = CategoricalHyperparameter("shrinking", ["true", "false"],
                                          default="true")
    use_shrinking = InCondition(child=shrinking,
                                parent=algorithm,
                                values=["svm"])
    cs.add_hyperparameters([C, shrinking])
    cs.add_conditions([use_C, use_shrinking])

    # Others are kernel-specific, so we can add conditions to limit the searchspace
    degree = UniformIntegerHyperparameter(
        "degree", 1, 5, default=3)  # Only used by kernel poly
    coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0,
                                       default=0.0)  # poly, sigmoid
    cs.add_hyperparameters([degree, coef0])
    use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
    use_coef0 = InCondition(child=coef0,
                            parent=kernel,
                            values=["poly", "sigmoid"])
    cs.add_conditions([use_degree, use_coef0])

    use_use_coef = InCondition(child=use_coef0,
                               parent=algorithm,
                               values=["svm"])
    use_use_degree = InCondition(child=use_degree,
                                 parent=algorithm,
                                 values=["svm"])
    cs.add_conditions([use_use_degree, use_use_coef0])
    # This also works for parameters that are a mix of categorical and values from a range of numbers
    # For example, gamma can be either "auto" or a fixed float
    gamma = CategoricalHyperparameter(
        "gamma", ["auto", "value"], default="auto")  # only rbf, poly, sigmoid
    gamma_value = UniformFloatHyperparameter("gamma_value",
                                             0.0001,
                                             8,
                                             default=1)
    cs.add_hyperparameters([gamma, gamma_value])
    # We only activate gamma_value if gamma is set to "value"
    use_gamma_value = InCondition(child=gamma_value,
                                  parent=gamma,
                                  values=["value"])
    # And again we can restrict the use of gamma in general to the choice of the kernel
    use_gamma = InCondition(child=gamma,
                            parent=kernel,
                            values=["rbf", "poly", "sigmoid"])
    cs.add_conditions([use_gamma_value, use_gamma])
    use_use_gamma = InCondition(child=use_gamma,
                                parent=algorithm,
                                values=["svm"])
    use_use_gamma_value = InCondition(child=use_gamma_value,
                                      parent=algorithm,
                                      values=["svm"])
    cs.add_conditions([use_use_gamma_value, use_use_gamma])
    return cs
    def bayesianmcmc(self):
        from sklearn.decomposition import PCA
        from sklearn.manifold import Isomap
        from sklearn import svm
        from sklearn.ensemble import RandomForestClassifier
        from mahotas.features import haralick
        import os
        from sklearn.model_selection import StratifiedKFold, train_test_split
        from sklearn import metrics
        from sklearn.preprocessing import StandardScaler
        import cv2
        from sklearn.neighbors import KNeighborsClassifier

        def naive_all_features(names):
            f = []
            for i in range(len(names)):
                I = cv2.imread(names[i])
                l = I.shape
                f1 = []
                if I is None or I.size == 0 or np.sum(
                        I[:]) == 0 or I.shape[0] == 0 or I.shape[1] == 0:
                    if len(l) == 3:
                        f1 = np.zeros((1, l[0] * l[1] * l[2]))
                else:
                    f1 = I.flatten()
                f1 = np.expand_dims(f1, 0)
                if i == 0:
                    f = f1
                else:
                    f = np.vstack((f, f1))
            return f

        def haralick_all_features(X, distance=1):
            f = []
            for i in range(len(X)):
                I = cv2.imread(X[i])
                if I is None or I.size == 0 or np.sum(
                        I[:]) == 0 or I.shape[0] == 0 or I.shape[1] == 0:
                    h = np.zeros((1, 13))
                else:
                    I = cv2.cvtColor(I, cv2.COLOR_BGR2GRAY)
                    h = haralick(I,
                                 distance=distance,
                                 return_mean=True,
                                 ignore_zeros=False)
                    h = np.expand_dims(h, 0)
                if i == 0:
                    f = h
                else:
                    f = np.vstack((f, h))
            return f

        def CNN_all_features(names, cnn):
            from keras.applications.vgg19 import VGG19
            from keras.applications.inception_v3 import InceptionV3
            from keras.applications.vgg19 import preprocess_input
            f = []
            if cnn == 'VGG':
                model = VGG19(weights='imagenet')
                dsize = (224, 224)
            else:
                model = InceptionV3(weights='imagenet')
                dsize = (299, 299)
            for i in range(len(names)):
                img = cv2.imread(names[i])
                img = cv2.resize(img, dsize=dsize)
                img = img.astype('float32')
                x = np.expand_dims(img, axis=0)
                x = preprocess_input(x)
                features = model.predict(x)
                if i == 0:
                    f = features
                else:
                    f = np.vstack((f, features))
            return f

        def VGG_all_features(names, X):
            home = os.path.expanduser('~')
            if os.path.exists(self.data_loc + 'features/bayesian1/VGG_' +
                              self.data_name + '.npz'):
                f = np.load(
                    open(
                        self.data_loc + 'features/bayesian1/VGG_' +
                        self.data_name + '.npz', 'rb'))
                return f.f.arr_0[X, :]
            else:
                f = CNN_all_features(names, 'VGG')
                np.savez(
                    open(
                        self.data_loc + 'features/bayesian1/VGG_' +
                        self.data_name + '.npz', 'wb'), f)
                return f[X, :]

        def inception_all_features(names, X):
            home = os.path.expanduser('~')
            if os.path.exists(self.data_loc + 'features/bayesian1/inception_' +
                              self.data_name + '.npz'):
                f = np.load(
                    open(
                        self.data_loc + 'features/bayesian1/inception_' +
                        self.data_name + '.npz', 'rb'))
                return f.f.arr_0[X, :]
            else:
                f = CNN_all_features(names, 'inception')
                np.savez(
                    open(
                        self.data_loc + 'features/bayesian1/inception_' +
                        self.data_name + '.npz', 'wb'), f)
                return f[X, :]

        def principal_components(X, whiten=True):
            pca = PCA(whiten=whiten)
            maxvar = 0.95
            X = np.asarray(X)
            if len(X.shape) == 1:
                X = X.reshape(-1, 1)
            data = X
            X1 = pca.fit(X)
            var = pca.explained_variance_ratio_
            s1 = 0
            for i in range(len(var)):
                s1 += var[i]
            s = 0
            for i in range(len(var)):
                s += var[i]
                if (s * 1.0 / s1) >= maxvar:
                    break
            pca = PCA(n_components=i + 1)
            pca.fit(data)
            return pca

        def isomap(X, n_neighbors=5, n_components=2):
            iso = Isomap(n_components=n_components, n_neighbors=n_neighbors)
            X = np.asarray(X)
            if len(X.shape) == 1:
                X = X.reshape(-1, 1)
            iso.fit(X)
            return iso

        def random_forests(X, y, n_estimators, max_features):
            clf = RandomForestClassifier(n_estimators=n_estimators,
                                         max_features=max_features,
                                         class_weight='balanced')
            clf.fit(X, y)
            return clf

        def support_vector_machines(X, y, C, gamma):
            clf = svm.SVC(C=C,
                          gamma=gamma,
                          class_weight='balanced',
                          probability=True)
            clf.fit(X, y)
            return clf

        def knn(X, y, neighbors=1):
            clf = KNeighborsClassifier(n_neighbors=neighbors)
            clf.fit(X, y)
            return clf

        def pipeline_from_cfg(cfg):
            cfg = {k: cfg[k] for k in cfg if cfg[k]}
            # Load the data
            data_home = self.data_loc + 'datasets/' + self.data_name + '/'
            l1 = os.listdir(data_home)
            y = []
            names = []
            cnt = 0
            for z in range(len(l1)):
                if l1[z][0] == '.':
                    continue
                l = os.listdir(data_home + l1[z] + '/')
                y += [cnt] * len(l)
                cnt += 1
                for i in range(len(l)):
                    names.append(data_home + l1[z] + '/' + l[i])
            # Train val split
            X = np.empty((len(y), 1))
            indices = np.arange(len(y))
            X1, _, y1, y_val, id1, _ = train_test_split(X,
                                                        y,
                                                        indices,
                                                        test_size=0.2,
                                                        random_state=42,
                                                        shuffle=True)
            s = []
            val_splits = 3
            kf = StratifiedKFold(n_splits=val_splits,
                                 random_state=42,
                                 shuffle=True)
            names1 = []
            for i in range(len(id1)):
                names1.append((names[id1[i]]))
            f11 = []
            for idx1, idx2 in kf.split(X1, y1):
                # Feature extraction
                ids1 = []
                X_train = []
                y_train = []
                for i in idx1:
                    X_train.append(names1[i])
                    y_train.append(y1[i])
                    ids1.append(id1[i])
                X_val = []
                y_val = []
                ids2 = []
                for i in idx2:
                    X_val.append(names1[i])
                    y_val.append(y1[i])
                    ids2.append(id1[i])
                # Feature extraction
                f_train = []
                # f_test = []
                f_val = []
                if cfg['feature_extraction'] == "haralick":
                    f_val = haralick_all_features(X_val,
                                                  cfg["haralick_distance"])
                    f_train = haralick_all_features(X_train,
                                                    cfg["haralick_distance"])
                elif cfg['feature_extraction'] == "VGG":
                    f_val = VGG_all_features(names, ids2)
                    f_train = VGG_all_features(names, ids1)
                elif cfg['feature_extraction'] == "inception":
                    f_val = inception_all_features(names, ids2)
                    f_train = inception_all_features(names, ids1)
                elif cfg['feature_extraction'] == "naive_feature_extraction":
                    f_val = naive_all_features(X_val)
                    f_train = naive_all_features(X_train)

                # Dimensionality reduction
                if cfg['dimensionality_reduction'] == "PCA":
                    cfg["pca_whiten"] = True if cfg[
                        "pca_whiten"] == "true" else False
                    dr = principal_components(f_train, cfg["pca_whiten"])
                    f_train = dr.transform(f_train)
                    f_val = dr.transform(f_val)

                elif cfg['dimensionality_reduction'] == "ISOMAP":
                    dr = isomap(f_train, cfg["n_neighbors"],
                                cfg["n_components"])
                    f_train = dr.transform(f_train)
                    f_val = dr.transform(f_val)

                elif cfg[
                        'dimensionality_reduction'] == 'naive_dimensionality_reduction':
                    f_train = f_train
                    f_val = f_val

                # Pre-processing
                normalizer = StandardScaler().fit(f_train)
                f_train = normalizer.transform(f_train)
                f_val = normalizer.transform(f_val)

                # Learning algorithms
                if cfg['learning_algorithm'] == "RF":
                    clf = random_forests(f_train, y_train, cfg["n_estimators"],
                                         cfg["max_features"])
                elif cfg['learning_algorithm'] == "SVM":
                    clf = support_vector_machines(f_train, y_train,
                                                  cfg["svm_C"],
                                                  cfg["svm_gamma"])
                elif cfg['learning_algorithm'] == 'naive_learning_algorithm':
                    clf = knn(f_train, y_train)
                p_pred = clf.predict_proba(f_val)
                f11.append(metrics.log_loss(y_val, p_pred))
                s.append(clf.score(f_val, y_val))
            return np.mean(f11)

        self.potential = []
        self.best_pipelines = []
        self.times = []
        self.error_curves = []
        cs = ConfigurationSpace()
        feature_extraction = CategoricalHyperparameter(
            "feature_extraction", ["haralick", "VGG", "inception"],
            default="haralick")
        cs.add_hyperparameter(feature_extraction)

        dimensionality_reduction = CategoricalHyperparameter(
            "dimensionality_reduction", ["PCA", "ISOMAP"], default="PCA")
        cs.add_hyperparameter(dimensionality_reduction)

        learning_algorithm = CategoricalHyperparameter("learning_algorithm",
                                                       ["SVM", "RF"],
                                                       default="RF")
        cs.add_hyperparameter(learning_algorithm)

        haralick_distance = UniformIntegerHyperparameter("haralick_distance",
                                                         1,
                                                         3,
                                                         default=1)
        cs.add_hyperparameter(haralick_distance)
        cond1 = InCondition(child=haralick_distance,
                            parent=feature_extraction,
                            values=["haralick"])
        cs.add_condition(cond1)

        pca_whiten = CategoricalHyperparameter("pca_whiten", ["true", "false"],
                                               default="true")
        cs.add_hyperparameter(pca_whiten)
        cs.add_condition(
            InCondition(child=pca_whiten,
                        parent=dimensionality_reduction,
                        values=["PCA"]))

        n_neighbors = UniformIntegerHyperparameter("n_neighbors",
                                                   3,
                                                   7,
                                                   default=5)
        n_components = UniformIntegerHyperparameter("n_components",
                                                    2,
                                                    4,
                                                    default=2)
        cs.add_hyperparameters([n_neighbors, n_components])
        cs.add_condition(
            InCondition(child=n_components,
                        parent=dimensionality_reduction,
                        values=["ISOMAP"]))
        cs.add_condition(
            InCondition(child=n_neighbors,
                        parent=dimensionality_reduction,
                        values=["ISOMAP"]))

        svm_C = UniformFloatHyperparameter("svm_C", 0.1, 100.0, default=1.0)
        cs.add_hyperparameter(svm_C)
        svm_gamma = UniformFloatHyperparameter("svm_gamma", 0.01, 8, default=1)
        cs.add_hyperparameter(svm_gamma)
        cond1 = InCondition(child=svm_C,
                            parent=learning_algorithm,
                            values=["SVM"])
        cond2 = InCondition(child=svm_gamma,
                            parent=learning_algorithm,
                            values=["SVM"])
        cs.add_conditions([cond1, cond2])

        n_estimators = UniformIntegerHyperparameter("n_estimators",
                                                    8,
                                                    300,
                                                    default=10)
        max_features = UniformFloatHyperparameter("max_features",
                                                  0.3,
                                                  0.8,
                                                  default=0.5)
        cs.add_hyperparameters([max_features, n_estimators])
        cond1 = InCondition(child=n_estimators,
                            parent=learning_algorithm,
                            values=["RF"])
        cond2 = InCondition(child=max_features,
                            parent=learning_algorithm,
                            values=["RF"])
        cs.add_conditions([cond1, cond2])

        scenario = Scenario({
            "run_obj": "quality",
            "cutoff_time": 100000,
            "runcount_limit": 10000 * 10,
            "cs": cs,
            "maxR": 100000,
            "wallclock_limit": 1000000,
            "deterministic": "true"
        })
        smac = SMAC(scenario=scenario,
                    rng=np.random.RandomState(42),
                    tae_runner=pipeline_from_cfg)
        incumbent, incs, incumbents, incumbents1, times = smac.optimize()
        inc_value = pipeline_from_cfg(incumbent)
        self.best_pipelines.append(incumbent)
        self.potential.append(inc_value)
        self.incumbents = incumbents
        self.all_incumbents = incumbents1
        self.error_curves.append(incs)
        self.times = times
        pickle.dump(
            self,
            open(
                self.results_loc + 'intermediate/SMAC/SMAC_' + self.data_name +
                '_run_' + str(self.run) + '_full.pkl', 'wb'))
step2_child_anova = InCondition(child=anova_perc,
                                parent=step2,
                                values=["cpoFilterAnova(perc=perc_val)"])
step2_child_kruskal = InCondition(child=kruskal_perc,
                                  parent=step2,
                                  values=["cpoFilterKruskal(perc=perc_val)"])
step2_child_univar = InCondition(child=univar_perc,
                                 parent=step2,
                                 values=["cpoFilterUnivariate(perc=perc_val)"])
step2_child_pca = InCondition(
    child=pca_perc,
    parent=step2,
    values=["cpoPca(center = FALSE, rank = rank_val)"])
cs.add_conditions([
    step2_child_anova, step2_child_kruskal, step2_child_univar, step2_child_pca
])

step3 = CategoricalHyperparameter(
    "Model", ['kknn', 'ksvm', 'ranger', 'xgboost', 'naiveBayes'])
cs.add_hyperparameter(step3)

hyper_kknn = UniformIntegerHyperparameter("lrn_kknn_k", 1, 19, default_value=1)
hyper_ksvm_C = UniformFloatHyperparameter("lrn_svm_C",
                                          2**(-15),
                                          2**(15),
                                          default_value=1)

hyper_ksvm_sigma = UniformFloatHyperparameter("lrn_svm_sigma",
                                              2**(-15),
                                              2**(15),
예제 #14
0
def main():
    parser = argparse.ArgumentParser(description='Dump data of a log.')
    parser.add_argument('--dataset',
                        type=str,
                        default='labelme',
                        help='dataset to run smac on')
    parser.add_argument('--m',
                        type=int,
                        default=8,
                        help=' number of codebooks')

    args = parser.parse_args()

    # Fixed parameters
    dataset = CategoricalHyperparameter("dataset", [args.dataset],
                                        default_value=args.dataset)
    m = CategoricalHyperparameter("m", [str(args.m)],
                                  default_value=str(args.m))

    # Build Configuration Space which defines all parameters and their ranges
    ilsiter = UniformIntegerHyperparameter("ilsiter", 1, 16, default_value=8)
    npert = UniformIntegerHyperparameter("npert",
                                         0,
                                         args.m - 1,
                                         default_value=4)
    randord = CategoricalHyperparameter("randord", ["true", "false"],
                                        default_value="true")

    # SR parameters
    sr_method = CategoricalHyperparameter("SR_method", ["LSQ", "SR_C", "SR_D"],
                                          default_value="SR_D")
    schedule = CategoricalHyperparameter("schedule", ["1", "2", "3"],
                                         default_value="1")
    p = UniformFloatHyperparameter("p", 0.1, 1., default_value=0.5)

    # Schedule and p only make sense in SR
    use_schedule = InCondition(child=schedule,
                               parent=sr_method,
                               values=["SR_C", "SR_D"])
    use_p = InCondition(child=p, parent=sr_method, values=["SR_C", "SR_D"])

    cs = ConfigurationSpace()
    cs.add_hyperparameters(
        [dataset, m, ilsiter, npert, randord, sr_method, schedule, p])
    cs.add_conditions([use_schedule, use_p])

    # Scenario object
    scenario = Scenario({
        "run_obj": "quality",  # we optimize quality (alternatively runtime)
        "runcount-limit": 200,  # maximum function evaluations
        "cs": cs,  # configuration space
        "deterministic": "false"
    })

    # Optimize, using a SMAC-object
    thing_to_call = AbstractTAFunc(recall_from_cfg, use_pynisher=False)
    smac = SMAC(scenario=scenario,
                rng=np.random.RandomState(42),
                tae_runner=thing_to_call)

    print("Optimizing!")
    incumbent = smac.optimize()
    inc_value = recall_from_cfg(incumbent)
    print("Optimized Value: %.2f" % (inc_value))
예제 #15
0
        def test_photon_implementation_switch(self):
            # PHOTON implementation
            self.pipe.add(PipelineElement('StandardScaler'))
            self.pipe += PipelineElement(
                'PCA', hyperparameters={'n_components': IntegerRange(5, 30)})
            estimator_siwtch = Switch("Estimator")
            estimator_siwtch += PipelineElement('SVC',
                                                hyperparameters={
                                                    'kernel':
                                                    Categorical(
                                                        ["rbf", 'poly']),
                                                    'C':
                                                    FloatRange(0.5, 200)
                                                },
                                                gamma='auto')
            estimator_siwtch += PipelineElement('RandomForestClassifier',
                                                hyperparameters={
                                                    'criterion':
                                                    Categorical(
                                                        ['gini', 'entropy']),
                                                    'min_samples_split':
                                                    IntegerRange(2, 4)
                                                })
            self.pipe += estimator_siwtch
            self.X, self.y = self.simple_classification()
            self.pipe.fit(self.X, self.y)

            # direct AUTO ML implementation

            # Build Configuration Space which defines all parameters and their ranges
            cs = ConfigurationSpace()
            n_components = UniformIntegerHyperparameter(
                "PCA__n_components", 5, 30)
            cs.add_hyperparameter(n_components)

            switch = CategoricalHyperparameter("Estimator_switch",
                                               ['svc', 'rf'])
            cs.add_hyperparameter(switch)

            kernel = CategoricalHyperparameter("SVC__kernel", ["rbf", 'poly'])
            cs.add_hyperparameter(kernel)
            c = UniformFloatHyperparameter("SVC__C", 0.5, 200)
            cs.add_hyperparameter(c)
            use_svc_c = InCondition(child=kernel,
                                    parent=switch,
                                    values=["svc"])
            use_svc_kernel = InCondition(child=c,
                                         parent=switch,
                                         values=["svc"])

            criterion = CategoricalHyperparameter(
                "RandomForestClassifier__criterion", ['gini', 'entropy'])
            cs.add_hyperparameter(criterion)
            minsplit = UniformIntegerHyperparameter(
                "RandomForestClassifier__min_samples_split", 2, 4)
            cs.add_hyperparameter(minsplit)

            use_rf_crit = InCondition(child=criterion,
                                      parent=switch,
                                      values=["rf"])
            use_rf_minsplit = InCondition(child=minsplit,
                                          parent=switch,
                                          values=["rf"])

            cs.add_conditions(
                [use_svc_c, use_svc_kernel, use_rf_crit, use_rf_minsplit])

            # Scenario object
            scenario = Scenario({
                "run_obj": "quality",
                "cs": cs,
                "deterministic": "true",
                "wallclock_limit": self.time_limit,
                "limit_resources": False,
                'abort_on_first_run_crash': False
            })

            # Optimize, using a SMAC directly
            smac = SMAC4HPO(scenario=scenario,
                            rng=42,
                            tae_runner=self.objective_function_switch)
            _ = smac.optimize()

            runhistory_photon = self.smac_helper["data"].solver.runhistory
            runhistory_original = smac.solver.runhistory

            x_ax = range(
                1,
                min(len(runhistory_original._cost_per_config.keys()),
                    len(runhistory_photon._cost_per_config.keys())) + 1)
            y_ax_original = [
                runhistory_original._cost_per_config[tmp] for tmp in x_ax
            ]
            y_ax_photon = [
                runhistory_photon._cost_per_config[tmp] for tmp in x_ax
            ]

            min_len = min(len(y_ax_original), len(y_ax_photon))
            self.assertLessEqual(
                np.max(
                    np.abs(
                        np.array(y_ax_original[:min_len]) -
                        np.array(y_ax_photon[:min_len]))), 0.01)