def define_search_space(objective, starting_params):
    if objective == "rmse":
        prediction = starting_params["collaborative_params"][
            "prediction_network_params"]
        rmse_space = {
            'lr':
            hp.qlognormal("lr", np.log(prediction["lr"]),
                          0.5 * prediction["lr"], 0.05 * prediction["lr"]),
            'epochs':
            hp.quniform('epochs', prediction["epochs"] - 20,
                        prediction["epochs"] + 20, 5),
            'kernel_l2':
            hp.choice('kernel_l2', [
                0.0,
                hp.qloguniform('kernel_l2_choice', np.log(1e-9), np.log(1e-5),
                               5e-9)
            ]),
            'batch_size':
            hp.qloguniform('batch_size', np.log(512), np.log(1023), 512),
            # 'batch_size': hp.choice('batch_size', [512]),
            'conv_depth':
            hp.quniform('conv_depth', 1, prediction["conv_depth"] + 1, 1),
            'gaussian_noise':
            hp.qlognormal('gaussian_noise',
                          np.log(prediction["gaussian_noise"]),
                          0.5 * prediction["gaussian_noise"], 0.005),
            'network_depth':
            hp.quniform('network_depth', 1, prediction['network_depth'] + 1,
                        1),
            'n_dims':
            hp.quniform('n_dims', starting_params["n_dims"] - 32,
                        starting_params["n_dims"] + 64, 16),
        }
        return rmse_space
    if objective == "ndcg":

        embedding = starting_params["collaborative_params"]["user_item_params"]
        ndcg_space = {
            'gcn_lr':
            hp.qlognormal("gcn_lr", np.log(embedding["gcn_lr"]),
                          0.5 * embedding["gcn_lr"],
                          0.05 * embedding["gcn_lr"]),
            'gcn_epochs':
            hp.quniform('gcn_epochs', embedding["gcn_epochs"],
                        embedding["gcn_epochs"] + 20, 5),
            'gaussian_noise':
            hp.qlognormal('gaussian_noise',
                          np.log(embedding["gaussian_noise"]),
                          1.0 * embedding["gaussian_noise"], 0.005),
            'margin':
            hp.quniform('margin', 0.8, 1.8, 0.2),
            'n_dims':
            hp.quniform('n_dims', starting_params["n_dims"],
                        starting_params["n_dims"] + 96, 16),
        }

        return ndcg_space
Esempio n. 2
0
def main():
    client = Client()
    print 'n. clients: ', len(client)

    digits = load_digits()

    X = MinMaxScaler().fit_transform(digits.data)
    y = digits.target

    pre_processing = hp.choice('preproc_algo', [
        scope.PCA(
            n_components=1 + hp.qlognormal(
                'pca_n_comp', np.log(10), np.log(10), 1),
            whiten=hp.choice(
                'pca_whiten', [False, True])),
        scope.GMM(
            n_components=1 + hp.qlognormal(
                'gmm_n_comp', np.log(100), np.log(10), 1),
            covariance_type=hp.choice(
                'gmm_covtype', ['spherical', 'tied', 'diag', 'full'])),
        ])

    classifier = hp.choice('classifier', [
        scope.DecisionTreeClassifier(
            criterion=hp.choice('dtree_criterion', ['gini', 'entropy']),
            max_features=hp.uniform('dtree_max_features', 0, 1),
            max_depth=hp.quniform('dtree_max_depth', 1, 25, 1)),
        scope.SVC(
            C=hp.lognormal('svc_rbf_C', 0, 3),
            kernel='rbf',
            gamma=hp.lognormal('svc_rbf_gamma', 0, 2),
            tol=hp.lognormal('svc_rbf_tol', np.log(1e-3), 1)),
        ])

    sklearn_space = {'pre_processing': pre_processing,
                     'classifier': classifier}

    digits_cv_split_filenames = mmap_utils.persist_cv_splits(
                X, y, name='digits_10', n_cv_iter=10)

    mmap_utils.warm_mmap_on_cv_splits(client, digits_cv_split_filenames)

    trials = hyperselect.IPythonTrials(client)
    trials.fmin(
        partial(compute_evaluation,
            cv_split_filename=digits_cv_split_filenames[0],
            ),
        sklearn_space,
        algo=hyperopt.tpe.suggest,
        max_evals=30,
        verbose=1,
        )
    trials.wait()
    print trials.best_trial
Esempio n. 3
0
class DecisionTreeModel(TreeBasedModel):
    @staticmethod
    def build_estimator(args, train_data=None):
        return DecisionTreeRegressor(random_state=RANDOM_STATE, presort=True, **args)

    hp_space = {
        "criterion": hp.choice("criterion", ["mse", "friedman_mse", "mae"]),
        "max_depth": hp.pchoice(
            "max_depth_enabled",
            [
                (0.7, None),
                (0.3, 1 + scope.int(hp.qlognormal("max_depth", np.log(30), 0.5, 3))),
            ],
        ),
        "splitter": hp.choice("splitter_str", ["best", "random"]),
        "max_features": hp.pchoice(
            "max_features_str",
            [
                (0.2, "sqrt"),  # most common choice.
                (0.1, "log2"),  # less common choice.
                (0.1, None),  # all features, less common choice.
                (0.6, hp.uniform("max_features_str_frac", 0.0, 1.0)),
            ],
        ),
        "min_samples_split": scope.int(hp.quniform("min_samples_split_str", 2, 10, 1)),
        "min_samples_leaf": hp.choice(
            "min_samples_leaf_enabled",
            [
                1,
                scope.int(
                    hp.qloguniform("min_samples_leaf", np.log(1.5), np.log(50.5), 1)
                ),
            ],
        ),
    }
Esempio n. 4
0
 def parse_search_space(self, learner_space):
     '''
     search space is dictionary
     {'n_estimators': ('uniform', 1, 1000, 'discrete')}
     '''
     search_space = dict()
     for k, v in learner_space.iteritems():
         if v[2] == 'samples':
             v = (v[0], v[1], min(100, self.X.shape[0]/len(self.kf)-1), v[3])
         if v[3] == 'discrete':
             search_space[k] = hp.quniform(k, v[1], v[2], 1)
         elif v[0] == 'uniform':
             search_space[k] = hp.uniform(k, v[1], v[2])
         elif v[0] == 'loguniform':
             search_space[k] = hp.loguniform(k, v[1], v[2])
         elif v[0] == 'normal':
             search_space[k] = hp.normal(k, v[1], v[2])
         elif v[0] == 'lognormal':
             search_space[k] = hp.lognormal(k, v[1], v[2])
         elif v[0] == 'quniform':
             search_space[k] = hp.quniform(k, v[1], v[2], v[3])
         elif v[0] == 'qloguniform':
             search_space[k] = hp.qloguniform(k, v[1], v[2], v[3])
         elif v[0] == 'qnormal':
             search_space[k] = hp.qnormal(k, v[1], v[2], v[3])
         elif v[0] == 'qlognormal':
             search_space[k] = hp.qlognormal(k, v[1], v[2], v[3])
     return search_space
Esempio n. 5
0
def build_dist_func_instance(hp_name, func, args, hp_size=None):
    '''
    args:
        hp_name: the name of the hyperparameter associated with this func
        func: name of hyperopt dist func 
        args: list of float values

    processing:
        instantiate the named dist func with specified args

    return:
        instance of hyperopt dist func
    '''
    if func == "choice":
        dist = hp.choice(hp_name, args)

    elif func == "randint":
        max_value = 65535 if len(args) == 0 else args[0]

        # specify "size=None" to workaround hyperopt bug
        if hp_size:
            # let size default to () (error if we try to set it explictly)
            dist = hp.randint(hp_name, max_value)
        else:
            dist = hp.randint(hp_name, max_value, size=None)

    elif func == "uniform":
        arg_check(func, args, count=2)
        dist = hp.uniform(hp_name, *args)

    elif func == "normal":
        arg_check(func, args, count=2)
        dist = hp.normal(hp_name, *args)

    elif func == "loguniform":
        arg_check(func, args, count=2)
        dist = hp.loguniform(hp_name, *args)

    elif func == "lognormal":
        arg_check(func, args, count=2)
        dist = hp.lognormal(hp_name, *args)

    elif func == "quniform":
        arg_check(func, args, count=3)
        dist = hp.quniform(hp_name, *args)

    elif func == "qnormal":
        arg_check(func, args, count=3)
        dist = hp.qnormal(hp_name, *args)

    elif func == "qloguniform":
        arg_check(func, args, count=3)
        dist = hp.qloguniform(hp_name, *args)

    elif func == "qlognormal":
        arg_check(func, args, count=3)
        dist = hp.qlognormal(hp_name, *args)

    return dist
Esempio n. 6
0
    def test_read_qlognormal(self):
        # 0 float
        # 1   hyperopt_param
        # 2     Literal{qlognormal}
        # 3     qlognormal
        # 4       Literal{0.0}
        # 5       Literal{1.0}
        # 6       Literal{0.5}
        qlognormal = hp.qlognormal("qlognormal", 0.0, 1.0, 0.5).inputs()[0].inputs()[1]
        ret = self.pyll_reader.read_qlognormal(qlognormal, "qlognormal")
        expected = configuration_space.NormalFloatHyperparameter(
            "qlognormal", 0.0, 1.0, q=0.5, base=np.e)
        self.assertEqual(expected, ret)

        qlognormal = hp.qlognormal("qlognormal", 1.0, 5.0, 1.0).inputs()[0].inputs()[1]
        ret = self.pyll_reader.read_qlognormal(qlognormal, "qlognormal")
        expected = configuration_space.NormalIntegerHyperparameter(
            "qlognormal", 1.0, 5.0, base=np.e)
        self.assertEqual(expected, ret)
Esempio n. 7
0
 def log_normal_from_bounds(label, left_bound, right_bound, quantization=None):
     log_left_bound = np.log(left_bound)
     log_right_bound = np.log(right_bound)
     log_mean = (log_left_bound + log_right_bound) / 2.0
     log_sigma = (log_right_bound - log_left_bound) / 4.0
     mean = np.exp(log_mean)
     hp_variable = (hp.lognormal(label, log_mean, log_sigma) if quantization is None
                    else hp.qlognormal(label, log_mean, log_sigma, quantization))
     dist = stats.lognorm(log_sigma, scale=mean)
     return Parameter(label, mean, hp_variable, dist.logpdf, dist.cdf)
Esempio n. 8
0
def define_search_space(objective, starting_params):
    prediction = starting_params["collaborative_params"][
        "prediction_network_params"]
    space = {
        'lr':
        hp.qlognormal("lr", np.log(prediction["lr"]), 0.5 * prediction["lr"],
                      0.05 * prediction["lr"]),
        'epochs':
        hp.quniform('epochs', prediction["epochs"] - 10,
                    prediction["epochs"] + 20, 5),
        'kernel_l2':
        hp.choice('kernel_l2', [
            0.0,
            hp.qloguniform('kernel_l2_choice', np.log(1e-9), np.log(1e-5),
                           5e-9)
        ]),
        'batch_size':
        hp.qloguniform('batch_size', np.log(1024), np.log(4096), 1024),
        'conv_depth':
        hp.quniform('conv_depth', 1, prediction["conv_depth"] + 2, 1),
        'gcn_layers':
        hp.quniform('gcn_layers', 1, prediction["gcn_layers"] + 1, 1),
        'ncf_layers':
        hp.quniform('ncf_layers', 1, prediction["ncf_layers"] + 1, 1),
        'ps_proportion':
        hp.choice('ps_proportion', [
            0.0,
            hp.qloguniform('ps_proportion_choice', np.log(0.1),
                           np.log(prediction["ps_proportion"] + 1.0), 0.05)
        ]),
        'ns_proportion':
        hp.quniform('ns_proportion', 0.0, prediction["ns_proportion"] + 2.0,
                    0.1),
        'nsh':
        hp.quniform('nsh', 0.0, prediction["nsh"] + 2.0, 0.1),
        # 'gaussian_noise': hp.qlognormal('gaussian_noise', np.log(prediction["gaussian_noise"]),
        #                                 0.5 * prediction["gaussian_noise"], 0.005),
        'gaussian_noise':
        hp.choice('gaussian_noise', [
            0.0,
            hp.qloguniform('gaussian_noise_choice', np.log(1e-3), np.log(0.5),
                           1e-3)
        ]),
        'margin':
        hp.choice('margin', [
            0.0,
            hp.qloguniform('margin_choice', np.log(1e-4), np.log(0.05), 5e-4)
        ]),
        'n_dims':
        hp.quniform('n_dims', starting_params["n_dims"] - 16,
                    starting_params["n_dims"] + 64, 16),
    }
    return space
Esempio n. 9
0
 def log_normal_from_bounds(label, left_bound, right_bound, quantization=None):
     log_left_bound = np.log(left_bound)
     log_right_bound = np.log(right_bound)
     log_mean = (log_left_bound + log_right_bound) / 2.0
     log_sigma = (log_right_bound - log_left_bound) / 4.0
     mean = np.exp(log_mean)
     hp_variable = (
         hp.lognormal(label, log_mean, log_sigma)
         if quantization is None
         else hp.qlognormal(label, log_mean, log_sigma, quantization)
     )
     dist = stats.lognorm(log_sigma, scale=mean)
     return Parameter(label, mean, hp_variable, dist.logpdf, dist.cdf)
Esempio n. 10
0
def many_dists():
    a = hp.choice('a', [0, 1, 2])
    b = hp.randint('b', 10)
    c = hp.uniform('c', 4, 7)
    d = hp.loguniform('d', -2, 0)
    e = hp.quniform('e', 0, 10, 3)
    f = hp.qloguniform('f', 0, 3, 2)
    g = hp.normal('g', 4, 7)
    h = hp.lognormal('h', -2, 2)
    i = hp.qnormal('i', 0, 10, 2)
    j = hp.qlognormal('j', 0, 2, 1)
    k = hp.pchoice('k', [(.1, 0), (.9, 1)])
    z = a + b + c + d + e + f + g + h + i + j + k
    return {'loss': scope.float(scope.log(1e-12 + z ** 2)),
            'status': base.STATUS_OK}
Esempio n. 11
0
def many_dists():
    a = hp.choice("a", [0, 1, 2])
    b = hp.randint("b", 10)
    bb = hp.randint("bb", 12, 25)
    c = hp.uniform("c", 4, 7)
    d = hp.loguniform("d", -2, 0)
    e = hp.quniform("e", 0, 10, 3)
    f = hp.qloguniform("f", 0, 3, 2)
    g = hp.normal("g", 4, 7)
    h = hp.lognormal("h", -2, 2)
    i = hp.qnormal("i", 0, 10, 2)
    j = hp.qlognormal("j", 0, 2, 1)
    k = hp.pchoice("k", [(0.1, 0), (0.9, 1)])
    z = a + b + bb + c + d + e + f + g + h + i + j + k
    return {"loss": scope.float(scope.log(1e-12 + z ** 2)), "status": base.STATUS_OK}
Esempio n. 12
0
def colkmeans(name,
              n_clusters=None,
              init=None,
              n_init=None,
              max_iter=None,
              tol=None,
              precompute_distances=True,
              verbose=0,
              random_state=None,
              copy_x=True,
              n_jobs=1):
    rval = scope.sklearn_ColumnKMeans(
        n_clusters=scope.int(
            hp.qloguniform(
                name + '.n_clusters',
                low=np.log(1.51),
                high=np.log(19.5),
                q=1.0)) if n_clusters is None else n_clusters,
        init=hp.choice(
            name + '.init',
            ['k-means++', 'random'],
        ) if init is None else init,
        n_init=hp.choice(
            name + '.n_init',
            [1, 2, 10, 20],
        ) if n_init is None else n_init,
        max_iter=scope.int(
            hp.qlognormal(
                name + '.max_iter',
                np.log(300),
                np.log(10),
                q=1,
            )) if max_iter is None else max_iter,
        tol=hp.lognormal(
            name + '.tol',
            np.log(0.0001),
            np.log(10),
        ) if tol is None else tol,
        precompute_distances=precompute_distances,
        verbose=verbose,
        random_state=random_state,
        copy_x=copy_x,
        n_jobs=n_jobs,
    )
    return rval
Esempio n. 13
0
def colkmeans(name,
              n_clusters=None,
              init=None,
              n_init=None,
              max_iter=None,
              tol=None,
              precompute_distances=True,
              verbose=0,
              random_state=None,
              copy_x=True,
              n_jobs=1):
    rval = scope.sklearn_ColumnKMeans(
        n_clusters=scope.int(
            hp.qloguniform(
                name + '.n_clusters',
                low=np.log(1.51),
                high=np.log(19.5),
                q=1.0)) if n_clusters is None else n_clusters,
        init=hp.choice(
            name + '.init',
            ['k-means++', 'random'],
            ) if init is None else init,
        n_init=hp.choice(
            name + '.n_init',
            [1, 2, 10, 20],
            ) if n_init is None else n_init,
        max_iter=scope.int(
            hp.qlognormal(
                name + '.max_iter',
                np.log(300),
                np.log(10),
                q=1,
                )) if max_iter is None else max_iter,
        tol=hp.lognormal(
            name + '.tol',
            np.log(0.0001),
            np.log(10),
            ) if tol is None else tol,
        precompute_distances=precompute_distances,
        verbose=verbose,
        random_state=random_state,
        copy_x=copy_x,
        n_jobs=n_jobs,
        )
    return rval
Esempio n. 14
0
class DecisionTreeModel(TreeBasedModel):
    @staticmethod
    def build_estimator(args, train_data=None):
        return DecisionTreeClassifier(random_state=RANDOM_STATE,
                                      presort=True,
                                      **args)

    hp_space = {
        'max_depth':
        hp.pchoice(
            'max_depth_enabled',
            [(0.7, None),
             (0.3,
              1 + scope.int(hp.qlognormal('max_depth', np.log(30), 0.5, 3)))]),
        'splitter':
        hp.choice('splitter_str', ['best', 'random']),
        'max_features':
        hp.pchoice(
            'max_features_str',
            [
                (0.2, 'sqrt'),  # most common choice.
                (0.1, 'log2'),  # less common choice.
                (0.1, None),  # all features, less common choice.
                (0.6, hp.uniform('max_features_str_frac', 0., 1.))
            ]),
        'min_samples_split':
        scope.int(hp.quniform('min_samples_split_str', 2, 10, 1)),
        'min_samples_leaf':
        hp.choice('min_samples_leaf_enabled', [
            1,
            scope.int(
                hp.qloguniform('min_samples_leaf', np.log(1.5), np.log(50.5),
                               1))
        ]),
        'class_weight':
        hp.pchoice('class_weight', [
            (0.5, None),
            (0.5, 'balanced'),
        ])
    }
Esempio n. 15
0
class RandomForestsModel(TreeBasedModel):
    @staticmethod
    def build_estimator(args, train_data=None):
        return RandomForestRegressor(random_state=RANDOM_STATE,
                                     n_jobs=-1,
                                     **args)

    hp_space = {
        "max_depth":
        hp.pchoice(
            "max_depth_enabled",
            [
                (0.7, None),
                (0.3, 1 +
                 scope.int(hp.qlognormal("max_depth", np.log(30), 0.5, 3))),
            ],
        ),
        "n_estimators":
        scope.int(hp.qloguniform("n_estimators", np.log(9.5), np.log(300), 1)),
        "min_samples_leaf":
        hp.choice(
            "min_samples_leaf_enabled",
            [
                1,
                scope.int(
                    hp.qloguniform("min_samples_leaf", np.log(1.5),
                                   np.log(50.5), 1)),
            ],
        ),
        "max_features":
        hp.pchoice(
            "max_features_str",
            [
                (0.1, "sqrt"),  # most common choice.
                (0.2, "log2"),  # less common choice.
                (0.1, None),  # all features, less common choice.
                (0.6, hp.uniform("max_features_str_frac", 0.0, 1.0)),
            ],
        ),
    }
class RandomForestsModel(TreeBasedModel):
    @staticmethod
    def build_estimator(args, train_data=None):
        return RandomForestClassifier(random_state=RANDOM_STATE,
                                      n_jobs=-1,
                                      **args)

    hp_space = {
        'max_depth':
        hp.pchoice(
            'max_depth_enabled',
            [(0.7, None),
             (0.3,
              1 + scope.int(hp.qlognormal('max_depth', np.log(30), 0.5, 3)))]),
        'n_estimators':
        scope.int(hp.qloguniform('n_estimators', np.log(9.5), np.log(300), 1)),
        'min_samples_leaf':
        hp.choice('min_samples_leaf_enabled', [
            1,
            scope.int(
                hp.qloguniform('min_samples_leaf', np.log(1.5), np.log(50.5),
                               1))
        ]),
        'max_features':
        hp.pchoice(
            'max_features_str',
            [
                (0.2, 'sqrt'),  # most common choice.
                (0.1, 'log2'),  # less common choice.
                (0.1, None),  # all features, less common choice.
                (0.6, hp.uniform('max_features_str_frac', 0., 1.))
            ]),
        'class_weight':
        hp.pchoice('class_weight', [(0.5, None), (0.3, 'balanced'),
                                    (0.2, 'balanced_subsample')])
    }
Esempio n. 17
0
        print(f"Broken", str(args))
        return {
            'status': 'ok', # or 'fail' if nan loss
            'loss': np.inf
        }
    loss = np.average(trials)
    print(f"Finished with {loss}", str(args))

    return {
        'status': 'ok', # or 'fail' if nan loss
        'loss': loss
    }


space = {
    'niterations': hp.qlognormal('niterations', np.log(10), 1.0, 1),
    'npop': hp.qlognormal('npop', np.log(100), 1.0, 1),
    'alpha': hp.lognormal('alpha', np.log(10.0), 1.0),
    'fractionReplacedHof': hp.lognormal('fractionReplacedHof', np.log(0.1), 1.0),
    'fractionReplaced': hp.lognormal('fractionReplaced', np.log(0.1), 1.0),
    'perturbationFactor': hp.lognormal('perturbationFactor', np.log(1.0), 1.0),
    'weightMutateConstant': hp.lognormal('weightMutateConstant', np.log(4.0), 1.0),
    'weightMutateOperator': hp.lognormal('weightMutateOperator', np.log(0.5), 1.0),
    'weightAddNode': hp.lognormal('weightAddNode', np.log(0.5), 1.0),
    'weightInsertNode': hp.lognormal('weightInsertNode', np.log(0.5), 1.0),
    'weightDeleteNode': hp.lognormal('weightDeleteNode', np.log(0.5), 1.0),
    'weightSimplify': hp.lognormal('weightSimplify', np.log(0.05), 1.0),
    'weightRandomize': hp.lognormal('weightRandomize', np.log(0.25), 1.0),
}

################################################################################
Esempio n. 18
0
    def createHyperoptSpace(self):
        name = self.root

        if 'anyOf' in self.config or 'oneOf' in self.config:
            data = []
            if 'anyOf' in self.config:
                data = self.config['anyOf']
            else:
                data = self.config['oneOf']

            choices = hp.choice(name, [
                Hyperparameter(param,
                               name + "." + str(index)).createHyperoptSpace()
                for index, param in enumerate(data)
            ])

            return choices
        elif self.config['type'] == 'object':
            space = {}
            for key in self.config['properties'].keys():
                config = self.config['properties'][key]
                space[key] = Hyperparameter(config, name + "." +
                                            key).createHyperoptSpace()
            return space
        elif self.config['type'] == 'number':
            mode = self.config.get('mode', 'uniform')
            scaling = self.config.get('scaling', 'linear')

            if mode == 'uniform':
                min = self.config.get('min', 0)
                max = self.config.get('max', 1)
                rounding = self.config.get('rounding', None)

                if scaling == 'linear':
                    if rounding is not None:
                        return hp.quniform(name, min, max, rounding)
                    else:
                        return hp.uniform(name, min, max)
                elif scaling == 'logarithmic':
                    if rounding is not None:
                        return hp.qloguniform(name, math.log(min),
                                              math.log(max), rounding)
                    else:
                        return hp.loguniform(name, math.log(min),
                                             math.log(max))
            if mode == 'normal':
                mean = self.config.get('mean', 0)
                stddev = self.config.get('stddev', 1)
                rounding = self.config.get('rounding', None)

                if scaling == 'linear':
                    if rounding is not None:
                        return hp.qnormal(name, mean, stddev, rounding)
                    else:
                        return hp.normal(name, mean, stddev)
                elif scaling == 'logarithmic':
                    if rounding is not None:
                        return hp.qlognormal(name, math.log(mean),
                                             math.log(stddev), rounding)
                    else:
                        return hp.lognormal(name, math.log(mean),
                                            math.log(stddev))
Esempio n. 19
0
def get_space(Utype=True,
              UNBktype=helper_naive_type(),
              Ualpha=hp.lognormal('alpha_', 0, 1),
              Ufit_prior=hp.choice('bool_', [True, False]),
              Ubinarize=hp.choice('binarize_', [.0,
                                    hp.lognormal('threshold_', 0, 1)]),
              UC=hp.lognormal('svm_C', 0, 2),
              Uwidth=hp.lognormal('svm_rbf_width', 0, 1),
              USVMktype=helper_svm(),
              Ucriterion=hp.choice('dtree_criterion', ['entropy',
                                                       'gini']),
              Umax_depth=hp.choice('dtree_max_depth',
                                   [None, 1 + hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]),
              Umin_samples_split=1 + hp.qlognormal('dtree_min_samples_split', 2, 1, 1),
              Uweights=hp.choice('weighting', ['uniform', 'distance']),
              Ualgo=hp.choice('algos', ['auto', 'brute',
                                        'ball_tree', 'kd_tree']),
              Uleaf_sz=20+hp.randint('size', 20),
              Up=hp.choice('distance', [1, 2]),
              Un_neighbors=hp.quniform('num', 3, 19, 1),
              Uradius=hp.uniform('rad', 0, 2),
              UNktype=helper_neighbors(),
              Uout_label=None,
              Upreprocess=True,
              Unorm=choice(['l1', 'l2']),
              Unaxis=1, Uw_mean=choice([True, False]), Uw_std=True,
              Usaxis=0, Ufeature_range=(0, 1), Un_components=None,
              Uwhiten=hp.choice('whiten_chose', [True, False])):

    give_me_bayes = get_bayes(
        UNBktype, Ualpha, Ufit_prior, Ubinarize, Upreprocess, Unorm,
        Unaxis, Uw_mean, Uw_std, Usaxis, Ufeature_range, Un_components,
        Uwhiten)
    give_me_svm = get_svm(
        UC, USVMktype, Uwidth, Upreprocess, Unorm, Unaxis, Uw_mean,
        Uw_std, Usaxis, Ufeature_range, Un_components, Uwhiten)
    give_me_dtree = get_dtree(
        Ucriterion, Umax_depth, Umin_samples_split, Upreprocess, Unorm,
        Unaxis, Uw_mean, Uw_std, Usaxis, Ufeature_range, Un_components,
        Uwhiten)
    give_me_neighbors = get_neighbors(
        UNktype, Uweights, Ualgo, Uleaf_sz, Up, Un_neighbors, Uradius,
        Uout_label, Upreprocess, Unorm, Unaxis, Uw_mean, Uw_std,
        Usaxis, Ufeature_range, Un_components, Uwhiten)

    if Utype == 'naive_bayes':
        res_space = give_me_bayes
    elif Utype == 'svm':
        res_space = give_me_svm
    elif Utype == 'dtree':
        res_space = give_me_dtree
    elif Utype == 'neighbors':
        res_space = give_me_neighbors
    else:
        return hp.choice('quick_fix',
                         [give_me_bayes,
                          give_me_svm,
                          give_me_dtree,
                          give_me_neighbors])

    return hp.choice('quick_fix', [res_space])
Esempio n. 20
0
                'var_losses': losses,
                'time': delta_t}

    trials = Trials()
    best = fmin(objective,
                space=parameter_space,
                algo=tpe.suggest,
                max_evals=max_evals,
                trials=trials)

    return best, trials


if __name__ == "__main__":
    farneback_space = {
        'pyr_scale': hp.uniform('pyr_scale', 0.1, 0.9),
        'levels': hp.qloguniform('levels', 0, np.log(11), q=1),
        'winsize': hp.qlognormal('winsize', np.log(100), 1/6*np.log(100), 1),
        'iterations': hp.quniform('iterations', 2, 6, 1),
        'poly_n': hp.quniform('poly_n', 2, 14, 1),
        'poly_sigma': hp.uniform('poly_sigma', 0.1, 2.0),
        'scale': hp.qloguniform('scale', np.log(0.1), np.log(1e7), 1),
    }

    def flow_calculator(args):
        return FarnebackFlow(**args)

    snt_steps = {(0*hour, 2*hour): hour, (0*hour, 4*hour): 2*hour, (0*hour, 6*hour): 3*hour}


    print(count)
    print(cfg)
    print(loss)
    print()
    return loss


if __name__ == '__main__':
    space = {
        # Actual batch_size == batch_size * num_concepts
        #'batch_size': hp.qlognormal('batch_size', 2.0, 0.3, 1),#4,
        #'epochs': hp.qlognormal('epochs', 8.3, 0.3, 100),#4000,
        # How often to anneal temperature
        # More like a traditional epoch due to small dataset size
        #'superepoch': hp.qlognormal('superepoch', 5.3, 0.2, 10),#200,
        'e_dense_size': hp.qlognormal('e_dense_size', 2.5, 0.4, 1),  #20,
        'd_dense_size': hp.qlognormal('d_dense_size', 3., 0.5, 1),  #20,
        #'input_dim': 8,
        #'num_concepts': 7,
        #'sentence_len': 7,
        #'vocab_size': 2,
        'temp_init': hp.lognormal('temp_init', 1.2, 0.4),  #4,
        'temp_decay': hp.uniform('temp_decay', 0.8, 1),  #0.9,
        #'train_st': hp.choice('train_st', [
        #    ('st_false', 0),
        #    ('st_true', 1),
        #]),
        #'test_prop': 0.1,
        'dropout_rate': hp.uniform('dropout_rate', 0, 0.4),  #0.3,

        #'verbose': True,
Esempio n. 22
0
        {
            'ktype': 'linear'
        },
        {
            'ktype': 'RBF',
            'width': hp.lognormal('rbf_width', 0, 1)
        },
    ]),
}

params_DT = {
    'criterion':
    hp.choice('criterion', ['gini', 'entropy']),
    'max_depth':
    hp.choice('max_depth',
              [None, hp.qlognormal('max_depth_int', 3, 1, 1)]),
    'min_samples_split':
    uniform_float("min_samples_split", 0, 1),
    #"max_features": np.random.randint(1, len(X_train.columns),20),
    #"min_samples_leaf": [2,3,4,5,6],
}

params_RF = {
    "n_estimators": uniform_int("n_estimators", 100, 1000),
    "max_depth": uniform_int("max_depth", 3, 15),
    "max_features": uniform_float("max_features", 0, 1),
    "criterion": hp.choice("criterion", ["gini", "entropy"])
}

##################################### TPE (Hyperopt library)
Esempio n. 23
0
def main():
    args = argParser()

    datafile = args.datafile

    resultprefix = 'tmp/' + args.datafile.rpartition('/')[2].rpartition('.')[0]
    if args.resultprefix:
        resultprefix = args.resultprefix
    # mod: include clustering method in clustering filename.
    cluster_file = resultprefix + '.cluster'
    subsequence_file = resultprefix + '.subsequence'
    graph_file = resultprefix + '.pdf'

    rstate = None
    seed_desc = "UNSET"
    if args.seed:
        rstate = np.random.RandomState(args.seed)
        seed_desc = str(args.seed)

    plot = args.plot

    print('PARSER PARAMETERS: ')
    print('--------------------')
    print('\tinput file: %s' % datafile)
    print('\tresult prefix: %s' % resultprefix)
    print('\tgraph_file: %s' % graph_file)
    print('\tsaved cluster results: %s' % os.path.isfile(cluster_file))
    print('\tsaved subsequence results: %s' % os.path.isfile(subsequence_file))
    print('\tseed: %s' % seed_desc)
    print('\tparallel: %s' % ("Yes" if PARALLEL else "No"))
    print('\tplot: %s' % ("Yes" if plot else "No"))
    print("--------------------")

    # run clustering
    new_cluster = False

    if os.path.isfile(cluster_file):
        print("Loading previous clustering...")

        cluster_results = pickle.load(open(cluster_file, 'rb'))

        long_keys = ["original_pts", "clustered_pts"]
        printable_results = {k : v for k, v in cluster_results.items() \
                                       if k not in long_keys}
        print("Loaded clustering with parameters: " + str(printable_results))
    else:
        print("Generating clustering...")

        # get data
        input_data = lib.parsing.parseSwitchTrace(datafile)
        np_input_data = np.array(input_data)

        # clustering
        X = np_input_data
        Y = lib.clustering.runPipeline(bGmmConf, X)
        cluster_results = {}
        cluster_results['original_pts'] = input_data
        cluster_results['clustered_pts'] = pickle.dumps(Y)

        if not os.path.exists(os.path.dirname(cluster_file)):
            try:
                os.makedirs(os.path.dirname(cluster_file))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    print("problem here: ", cluster_file, " ",
                          os.path.dirname(cluster_file))
                    raise
        pickle.dump(cluster_results, open(cluster_file, 'wb'))
        new_cluster = True

    # run subsequencing
    if not new_cluster and os.path.isfile(subsequence_file):
        print("Loading previous subsequences...")

        subsequences = pickle.load(open(subsequence_file, 'rb'))

        long_keys = ["merged_freq", "coverage_sum"]
        printable_results = {k : v for k, v in subsequences.items() \
                                       if k not in long_keys}
        print("Loaded subsequences with parameters: " + str(printable_results))
    else:
        print("Generating subsequences...")

        space = {
            'min_frequency_thresh':
            hp.qlognormal('min_frequency_thresh', 4, 0.6, 1),
            'clustered_pts':
            cluster_results['clustered_pts']
        }

        if PARALLEL:
            trials = MongoTrials('mongo://localhost:45555/db/jobs',
                                 exp_key='tpprof1')
        else:
            trials = Trials()

        best = fmin(fn=subsequence_objective,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=SUBSEQUENCE_EVALS,
                    trials=trials,
                    rstate=rstate)
        best_trial = trials.trials[np.argmin(
            [r['loss'] for r in trials.results])]
        subsequence_freq = pickle.loads(
            trials.trial_attachments(best_trial)['subsequence_freq'])
        subsequence_coverage = pickle.loads(
            trials.trial_attachments(best_trial)['subsequence_coverage'])

        merged_freq, coverage_sum = \
                lib.subsequencing.merge_stable(subsequence_freq,
                                               subsequence_coverage)
        best['merged_freq'] = merged_freq
        best['coverage_sum'] = coverage_sum

        subsequences = best
        pickle.dump(subsequences, open(subsequence_file, 'wb'))

    if plot:
        print("Drawing profile...")

        lib.drawing.plot(cluster_results['original_pts'],
                         pickle.loads(cluster_results['clustered_pts']),
                         subsequences['merged_freq'],
                         subsequences['coverage_sum'], graph_file, plot)
    else:
        print('Drawing disabled')
Esempio n. 24
0
        {
            'type': 'knn',
        },
#        {
#            'type': 'svm',
#            'C': hp.lognormal('svm_C', 0, 1),
#            'kernel': hp.choice('svm_kernel', [
#                {'ktype': 'linear'},
#                {'ktype': 'RBF', 'width': hp.lognormal('svm_rbf_width', 0, 1)},
#                ]),
#        },
        {
            'type': 'randomforest',
            'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']),
            'max_depth': hp.choice('dtree_max_depth',
                    [None, hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]),
            'min_samples_split': hp.qlognormal('dtree_min_samples_split', 2, 1, 1),
        },
    ])}

trials = Trials()

def objective(p):

    if p['classifier_type']['type'] == 'knn':
        clf_x = KNeighborsRegressor()
        clf_y = KNeighborsRegressor()
    elif p['classifier_type']['type'] == 'randomforest':
        clf_x = RandomForestRegressor(max_depth=p['classifier_type']['max_depth'],
                                    min_samples_split=p['classifier_type']['min_samples_split'])
        clf_y = RandomForestRegressor(max_depth=p['classifier_type']['max_depth'],
Esempio n. 25
0
def wikiLearn():
    """
    不是特别懂
    """
    # 1、简单的函数
    from hyperopt import fmin, tpe, hp
    best = fmin(fn=lambda x: x ** 2,
                space=hp.uniform('x', -10, 10),
                algo=tpe.suggest,
                max_evals=100)
    print best
    # 2、使用函数+ok状态
    from hyperopt import fmin, tpe, hp, STATUS_OK
    def objective(x):
        return {'loss': x ** 2, 'status': STATUS_OK }
    best = fmin(objective,
                space=hp.uniform('x', -10, 10),
                algo=tpe.suggest,
                max_evals=100)
    print best
    # 3、使用dict的返回
    import pickle
    import time
    from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
    def objective(x):
        return {
            'loss': x ** 2,
            'status': STATUS_OK,
            # -- store other results like this
            'eval_time': time.time(),
            'other_stuff': {'type': None, 'value': [0, 1, 2]},
            # -- attachments are handled differently
            'attachments': {'time_module': pickle.dumps(time.time)}
        }
    trials = Trials()
    best = fmin(objective,
                space=hp.uniform('x', -10, 10),
                algo=tpe.suggest,
                max_evals=100,
                trials=trials)
    print best
    print trials.trials
    print trials.results
    print trials.losses()
    print trials.statuses()
    # 没明白 attachments 是什么意思
    msg = trials.trial_attachments(trials.trials[5])['time_module']
    time_module = pickle.loads(msg)
    from hyperopt import hp
    space = hp.choice('a',
                      [
                          ('case 1', 1 + hp.lognormal('c1', 0, 1)),
                          ('case 2', hp.uniform('c2', -10, 10))
                      ])
    import hyperopt.pyll.stochastic
    print hyperopt.pyll.stochastic.sample(space)
    # hp.choice(label, options)
    # hp.randint(label, upper)                  # [0,upper]
    # hp.uniform(label, low, high)
    # hp.quniform(label, low, high, q)          # round(uniform(low, high) / q) * q
    # hp.loguniform(label, low, high)
    # hp.qloguniform(label, low, high, q)       # round(exp(uniform(low, high)) / q) * q
    # hp.normal(label, mu, sigma)
    # hp.qnormal(label, mu, sigma, q)           # round(normal(mu, sigma) / q) * q
    # hp.lognormal(label, mu, sigma)
    # hp.qlognormal(label, mu, sigma, q)        # round(exp(normal(mu, sigma)) / q) * q
    # 4、对于sklearn使用
    from hyperopt import hp
    space = hp.choice('classifier_type', [
        {
            'type': 'naive_bayes',
        },
        {
            'type': 'svm',
            'C': hp.lognormal('svm_C', 0, 1),
            'kernel': hp.choice('svm_kernel', [
                {'ktype': 'linear'},
                {'ktype': 'RBF', 'width': hp.lognormal('svm_rbf_width', 0, 1)},
            ]),
        },
        {
            'type': 'dtree',
            'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']),
            'max_depth': hp.choice('dtree_max_depth',
                                   [None, hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]),
            'min_samples_split': hp.qlognormal('dtree_min_samples_split', 2, 1, 1),
        },
    ])
    # 5、还是没有搞懂 scope.define
    import hyperopt.pyll
    from hyperopt.pyll import scope
    @scope.define
    def foo(a, b=0):
        print 'running foo', a, b
        return a + b / 2
    # -- this will print 0, foo is called as usual.
    print foo(0)
    # In describing search spaces you can use `foo` as you
    # would in normal Python. These two calls will not actually call foo,
    # they just record that foo should be called to evaluate the graph.
    space1 = scope.foo(hp.uniform('a', 0, 10))
    space2 = scope.foo(hp.uniform('a', 0, 10), hp.normal('b', 0, 1))
    # -- this will print an pyll.Apply node
    print space1
    # -- this will draw a sample by running foo()
    print hyperopt.pyll.stochastic.sample(space1)
Esempio n. 26
0
    Distribution looks like exp(normal(mu, sigma))
    """
    return hp.lognormal(name, mu, sigma)


def qlognormal(name, (mu, sigma, q)):
    """
    Function to create hyperopt qlognormal variable
    Input
    ------------------
    name - Variable name
    (mu, sigma, q) - Tuple of mean, standard deviation and q value.

    Distribution looks like round(exp(normal(mu, sigma))/ q)* q
    """
    return hp.qlognormal(name, mu, sigma, q)


def show_distributions_info():
    print "List of Distributions available"
    list_dist = [uniform, randint, choice, loguniform, quniform,
                 qloguniform, normal, lognormal, qlognormal]
    for dist in list_dist:
        print "Distribution name :", dist.__name__
        print "Docstring"
        print "------------------------------------"
        print dist.__doc__
# Additional helped functions
def gen_metric(func, metric):
    if metric == 'auc':
        return func.auc()
Esempio n. 27
0
    Distribution looks like exp(normal(mu, sigma))
    """
    return hp.lognormal(name, mu, sigma)


def qlognormal(name, (mu, sigma, q)):
    """
    Function to create hyperopt qlognormal variable
    Input
    ------------------
    name - Variable name
    (mu, sigma, q) - Tuple of mean, standard deviation and q value.

    Distribution looks like round(exp(normal(mu, sigma))/ q)* q
    """
    return hp.qlognormal(name, mu, sigma, q)


def show_distributions_info():
    print "List of Distributions available"
    list_dist = [
        uniform, randint, choice, loguniform, quniform, qloguniform, normal,
        lognormal, qlognormal
    ]
    for dist in list_dist:
        print "Distribution name :", dist.__name__
        print "Docstring"
        print "------------------------------------"
        print dist.__doc__

Esempio n. 28
0
def qlognormal(label, *args, **kwargs):
    return hp.qlognormal(label, *args, **kwargs)
Esempio n. 29
0
 def hyperopt(self):
     return hp.qlognormal(self.label.name, self.mu, self.sigma, self.q)
Esempio n. 30
0
    def createHyperoptSpace(self, lockedValues=None):
        name = self.root

        if lockedValues is None:
            lockedValues = {}

        if 'anyOf' in self.config or 'oneOf' in self.config:
            data = []
            if 'anyOf' in self.config:
                data = self.config['anyOf']
            else:
                data = self.config['oneOf']

            subSpaces = [
                Hyperparameter(param, self, name + "." +
                               str(index)).createHyperoptSpace(lockedValues)
                for index, param in enumerate(data)
            ]
            for index, space in enumerate(subSpaces):
                space["$index"] = index

            choices = hp.choice(self.hyperoptVariableName, subSpaces)

            return choices
        elif 'enum' in self.config:
            if self.name in lockedValues:
                return lockedValues[self.name]

            choices = hp.choice(self.hyperoptVariableName, self.config['enum'])
            return choices
        elif 'constant' in self.config:
            if self.name in lockedValues:
                return lockedValues[self.name]

            return self.config['constant']
        elif self.config['type'] == 'object':
            space = {}
            for key in self.config['properties'].keys():
                config = self.config['properties'][key]
                space[key] = Hyperparameter(
                    config, self,
                    name + "." + key).createHyperoptSpace(lockedValues)
            return space
        elif self.config['type'] == 'number':
            if self.name in lockedValues:
                return lockedValues[self.name]

            mode = self.config.get('mode', 'uniform')
            scaling = self.config.get('scaling', 'linear')

            if mode == 'uniform':
                min = self.config.get('min', 0)
                max = self.config.get('max', 1)
                rounding = self.config.get('rounding', None)

                if scaling == 'linear':
                    if rounding is not None:
                        return hp.quniform(self.hyperoptVariableName, min, max,
                                           rounding)
                    else:
                        return hp.uniform(self.hyperoptVariableName, min, max)
                elif scaling == 'logarithmic':
                    if rounding is not None:
                        return hp.qloguniform(self.hyperoptVariableName,
                                              math.log(min), math.log(max),
                                              rounding)
                    else:
                        return hp.loguniform(self.hyperoptVariableName,
                                             math.log(min), math.log(max))
            if mode == 'randint':
                max = self.config.get('max', 1)
                return hp.randint(self.hyperoptVariableName, max)

            if mode == 'normal':
                mean = self.config.get('mean', 0)
                stddev = self.config.get('stddev', 1)
                rounding = self.config.get('rounding', None)

                if scaling == 'linear':
                    if rounding is not None:
                        return hp.qnormal(self.hyperoptVariableName, mean,
                                          stddev, rounding)
                    else:
                        return hp.normal(self.hyperoptVariableName, mean,
                                         stddev)
                elif scaling == 'logarithmic':
                    if rounding is not None:
                        return hp.qlognormal(self.hyperoptVariableName,
                                             math.log(mean), math.log(stddev),
                                             rounding)
                    else:
                        return hp.lognormal(self.hyperoptVariableName,
                                            math.log(mean), math.log(stddev))
Esempio n. 31
0
    assert(min)
    return hp.loguniform(name, np.log(min), np.log(max))


params_SVM = {
    "C": hp.lognormal("C", 0, 1),
    "kernel": hp.choice("kernel", [
        {"ktype": "linear"},
        {"ktype": "RBF", "width": hp.lognormal("rbf_width", 0, 1)},
    ]),
}

params_DT = {
    "criterion": hp.choice("criterion", ["gini", "entropy"]),
    "max_depth": hp.choice("max_depth",
        [None, hp.qlognormal("max_depth_int", 3, 1, 1)]),
    "min_samples_split": uniform_float("min_samples_split", 0, 1),
    #"max_features": np.random.randint(1, len(X_train.columns),20),
    #"min_samples_leaf": [2,3,4,5,6],
}

params_RF = {
    "n_estimators":  uniform_int("n_estimators", 100, 1000),
    "max_depth":     uniform_int("max_depth", 3, 15),
    "max_features":  uniform_float("max_features", 0, 1),
    "criterion":     hp.choice("criterion", ["gini", "entropy"])
}

params_XGB = {
    "max_depth":         hp.quniform("max_depth", 4, 16, 1),
    "min_child_weight":  hp.quniform("min_child", 1, 10, 1),
Esempio n. 32
0
            'dtree_max_features', 0, 1),
        max_depth=hp.quniform(
            'dtree_max_depth', 0, 25, 1)),
    scope.SVC(
        C=hp.lognormal(
            'svc_rbf_C', 0, 3),
        kernel='rbf',
        gamma=hp.lognormal(
            'svc_rbf_gamma', 0, 2),
        tol=hp.lognormal(
            'svc_rbf_tol', np.log(1e-3), 1)),
    ])

pre_processing = hp.choice('preproc_algo', [
    scope.PCA(
        n_components=1 + hp.qlognormal(
            'pca_n_comp', np.log(10), np.log(10), 1),
        whiten=hp.choice(
            'pca_whiten', [False, True])),
    scope.GMM(
        n_components=1 + hp.qlognormal(
            'gmm_n_comp', np.log(100), np.log(10), 1),
        covariance_type=hp.choice(
            'gmm_covtype', ['spherical', 'tied', 'diag', 'full'])),
    ])

sklearn_space = {'pre_processing': pre_processing,
                 'classifier': classifier}
from hyperopt.pyll.stochastic import sample
print sample(sklearn_space)
print sample(sklearn_space)
Esempio n. 33
0
del df_varlist

space = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'seed': 9999,
    'tree_method': 'hist',
    'grow_policy': 'lossguide',
    'max_delta_step': hp.lognormal('max_delta_step', 0, 1),
    'min_child_weight': hp.lognormal('min_child_weight', 0, 1),
    'gamma': hp.lognormal('gamma', 0, 1),
    'lambda': hp.lognormal('lambda', 0, 1),
    'alpha': hp.lognormal('alpha', 0, 1),
    'eta': hp.loguniform('eta', log(2**-7), 0),
    'max_leaves': 128 - hp.qloguniform('max_leaves', log(4), log(128), 1),
    'max_bin': 2 + hp.qlognormal('max_bin', log(256 - 2), 1, 1),
    'subsample': 1.5 - hp.loguniform('subsample', log(0.5), 0),
    'colsample_bytree': 1.5 - hp.loguniform('colsample_bytree', log(0.5), 0),
    'colsample_bylevel': 1.5 - hp.loguniform('colsample_bylevel', log(0.5), 0)
}

best_prenatal = choose_model(df_2016_train[prenatal_vars],
                             df_2016_valid[prenatal_vars],
                             df_2016_hvalid[prenatal_vars], 'Prenatal')

best_NICU = choose_model(df_2016_NICU_train[prenatal_vars],
                         df_2016_NICU_valid[prenatal_vars],
                         df_2016_NICU_hvalid[prenatal_vars], 'NICU')

best_post_birth = choose_model(df_2016_train, df_2016_valid, df_2016_hvalid,
                               'Postnatal')