Exemple #1
0
    def auto_tune(self,
                  X,
                  y,
                  num_evals=50,
                  num_folds=2,
                  opt_metric="r_squared",
                  nprocs=1):

        if nprocs == -1:
            nprocs = mp.cpu_count()
        if nprocs != 1:
            pmap = optunity.parallel.create_pmap(nprocs)
        else:
            pmap = inspect.signature(
                optunity.minimize_structured).parameters["pmap"].default

        if opt_metric == "r_squared":
            optimal_configuration, info, _ = optunity.maximize_structured(
                partial(self._eval_factory(num_folds, r_squared), X, y),
                search_space=self.search,
                num_evals=num_evals,
                pmap=pmap)
        if opt_metric == "mse":
            optimal_configuration, info, _ = optunity.minimize_structured(
                partial(self._eval_factory(num_folds, mse), X, y),
                search_space=self.search,
                num_evals=num_evals,
                pmap=pmap)

        return optimal_configuration
Exemple #2
0
def compute_optimal_parameters(source,
                               target,
                               source_labels,
                               target_labels,
                               method_name,
                               classifier,
                               num_evals=50):
    if 'no_transfer' in method_name:
        return {}

    def to_maximize(**kwargs):
        kwargs = process_space_dict(kwargs, INT_ARGS)
        returned = do_method(source, target, source_labels, target_labels,
                             method_name, classifier, kwargs)
        if method_name == 'ada':
            return sklearn.metrics.balanced_accuracy_score(*reversed(returned))
        pred = classifier().fit(returned[0],
                                source_labels).predict(returned[1])
        # See https://stackoverflow.com/questions/25652663/scipy-sparse-eigensolver-memoryerror-after-multiple-passes-through-loop-without
        gc.collect()
        return sklearn.metrics.balanced_accuracy_score(target_labels, pred)

    space = SEARCH_SPACES_METHODS[method_name]
    optimal_params, _, _ = optunity.maximize_structured(to_maximize,
                                                        space,
                                                        num_evals=num_evals)
    optimal_params = process_space_dict(optimal_params, INT_ARGS)
    return optimal_params
    def fit(self, X, y):
        if not self.loader:
            self.loader = lambda: (X, y)

        est = self.estimator
        scorer = self.scoring
        cv = self.cv
        loader = self.loader

        callback = self.callback
        length = len(cv) * self.max_iter
        if callback:
            callback(0, length)
        transforms = self.transforms
        cacher = self.cacher
        fit_callback = self.fit_callback
        mapper = self.mapper

        cv_scores = {}

        def fit_func(**params):
            params = apply_transforms(params, transforms)
            base_id = len(cv_scores) * len(cv)

            scores = PSOSearch.cross_val_score(base_index=base_id,
                                               estimator=est,
                                               parameters=params,
                                               loader=loader,
                                               cv=cv,
                                               scorer=scorer,
                                               fit_callback=fit_callback,
                                               cacher=cacher,
                                               callback=callback,
                                               mapper=mapper)

            cv_score = _CVScoreTuple(params, np.mean(scores), scores)
            cv_scores[base_id] = cv_score
            best_score_params = cv_scores.values()[np.argmax(
                np.array(
                    map(lambda score: score.mean_validation_score,
                        cv_scores.itervalues())))]
            best_score_mean = best_score_params.mean_validation_score
            best_score_std = np.std(best_score_params.cv_validation_scores)
            if callback:
                callback(description='%.3f+-%.3f' %
                         (best_score_mean, best_score_std))
            return scores.mean()

        np.random.seed(1)
        random.seed(1)
        res, optimize_results, solver_info = optunity.maximize_structured(
            fit_func, self.space, num_evals=self.max_iter)

        self._best_score = optimize_results[0]
        self._grid_scores = cv_scores
        self._best_params = res
Exemple #4
0
    def maximise(self, target, num_evals, search=None, verbose=20,
                                         **constants):
        """ 
            maximise target within defined search space
        
            target has signature score=target(**params)
            constants are fixed for every iteration e.g. x and y
            verbose=20 report every iteration
        """                  
        self.verbose = verbose
        if search is None:
            with open("search.yaml") as f:
                search = yaml.load(f)
        #pprint(search_space)
        self.cleanparams(search)
        #pprint(search_space)

        # note ignore return values as using runs instead
        # runs has functions such as plot, report and correlations
        optunity.maximize_structured(self.make_target(target, **constants), 
                                     search_space=search, 
                                     num_evals=num_evals)
        self.runs.report()
    def run_optunity(self):
        cv_decorator = optunity.cross_validated(
            x=self.X,
            y=self.Y,
        )

        svm_tuned_auroc = cv_decorator(self.svm_tuned_auroc)

        optimal_svm_pars, info, _ = optunity.maximize_structured(
            svm_tuned_auroc, self.space, num_evals=150, pmap=optunity.pmap)
        print("Optimal parameters" + str(optimal_svm_pars))
        print("AUROC of tuned SVM: %1.3f" % info.optimum)

        df = optunity.call_log2dataframe(info.call_log)
        print(df.sort_values('value', ascending=False))
    def train(self):
        self._pca.fit(self._features_data)
        features_pca = self._pca.transform(self._features_data)

        cv_decorator = optunity.cross_validated(x=features_pca,
                                                y=self._labels,
                                                num_folds=5)

        svm_tuned = cv_decorator(svm_tuned_precision)

        optimal_svm_pars, _, _ = optunity.maximize_structured(
            svm_tuned,
            _SVM_SEARCH_SPACE,
            num_evals=self._config.get('num_evals', 100))

        self._model = _train_model(features_pca, self._labels,
                                   **optimal_svm_pars)
Exemple #7
0
    def optimize(self,
                 data,
                 search_space,
                 val_data=None,
                 num_evals=50,
                 optimize='max',
                 solver_name='particle swarm'):
        """
        Parameters:
        -----------
        data: [X, Y] - arrays
            This data will be used for crossval training (considering 'train_test_split' parameter)

        search_space: dict
            Dict with parameters to optimize. E.g. 'units' : [100,1000]

        val_data: [X, Y] - arrays
            Default - None. If specified than optimizer metric will be evaluated on val_data. Also if specified than 'train_test_split' parameter will be ignored.

        num_evals: int
            Count of iterations for optunity optimizer

        optimize: str
            'max'/'min' supported

        solver_name: str
            Default 'particle swarm'. Only default parameter supported now.     
        """
        train_manager = self._create_train_manager(data, val_data,
                                                   search_space)
        sys.setrecursionlimit(100000)
        if (optimize == 'max'):
            self.retr, self.extra, self.info = opt.maximize_structured(
                f=train_manager.train,
                num_evals=num_evals,
                search_space=search_space)
        elif (optimize == 'min'):
            self.retr, self.extra, self.info = opt.minimize_structured(
                f=train_manager.train,
                search_space=search_space,
                num_evals=num_evals)
        else:
            raise (InvalidParamError('optimize', optimize))
        #load and ret best
        best_model = train_manager.get_best_model(self.extra)
        return best_model
def compute_roc_tuned(x_train, y_train, x_test, y_test):

    # define objective function
    @optunity.cross_validated(x=x_train, y=y_train, num_iter=2, num_folds=5)
    def inner_cv(x_train, y_train, x_test, y_test, kernel='linear', C=0, gamma=0, degree=0, coef0=0):
        model = train_model(x_train, y_train, kernel, C, gamma, degree, coef0)
        decision_values = model.decision_function(x_test)
        return optunity.metrics.roc_auc(y_test, decision_values)

    # optimize parameters
    optimal_pars, _, _ = optunity.maximize_structured(inner_cv, space, 70, pmap=optunity.pmap)
    print('optimal parameters after tuning %s' % str(optimal_pars))
    # if you are running this in IPython, optunity.pmap will not work
    # more info at: https://github.com/claesenm/optunity/issues/8
    # comment out the above line and replace by the one below:
    # optimal_pars, _, _ = optunity.maximize_structured(inner_cv, space, 200)

    tuned_model = train_model(x_train, y_train, **optimal_pars)
    decision_values = tuned_model.decision_function(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc
def prepare_svm(X, Y, prob_setting):
    '''
    Code inspired by http://optunity.readthedocs.org/en/latest/notebooks/notebooks/sklearn-svc.html#tune-svc-without-deciding-the-kernel-in-advance
    '''
    cv_decorator = optunity.cross_validated(x=X, y=Y, num_folds=10)
    space = {'kernel': {'linear': {'C': [0, 1000], 'class_weight_param': [1, 22]},
                        'rbf': {'logGamma': [-5, 1], 'C': [0, 1000], 'class_weight_param': [1, 22]},
                        'poly': {'degree': [2, 5], 'C': [0, 1000], 'coef0': [0, 100],
                                 'class_weight_param': [1, 22]}}}

    def train_model(x_train, y_train, kernel, C, logGamma, degree, coef0, classWeightParam):
        if kernel=='linear':
            model = SVC(kernel=kernel, C=C, class_weight={1: classWeightParam})
        elif kernel=='poly':
            model = SVC(kernel=kernel, C=C, degree=degree, coef0=coef0, class_weight={1: classWeightParam})
        elif kernel=='rbf':
            model = SVC(kernel=kernel, C=C, gamma=10 ** logGamma, class_weight={1: classWeightParam})
        else:
            raise ValueError("Unknown kernel function: %s" % kernel)
        model.fit(x_train, y_train)
        return model


    def svm_tuned_auroc(x_train, y_train, x_test, y_test, kernel='linear', C=0, logGamma=0, degree=0, coef0=0, class_weight_param=1):
        model = train_model(x_train, y_train, kernel, C, logGamma, degree, coef0, class_weight_param)
        decision_values = model.decision_function(x_test)
        return optunity.metrics.roc_auc(y_test, decision_values)

    svm_tuned_auroc = cv_decorator(svm_tuned_auroc)

    optimal_svm_pars, info, _ = optunity.maximize_structured(svm_tuned_auroc, space, num_evals=200)
    print("Optimal parameters:"+str(optimal_svm_pars))
    print("AUROC of tuned SVM: %1.3f" % info.optimum)
    classifier = build_svc(optimal_svm_pars, prob_setting)
    classifier.fit(X, Y)
    return classifier
Exemple #10
0
        model.fit(x_train, y_train)
    else:
        raise ArgumentError('Unknown algorithm: %s' % algorithm)

    # predict the test set
    if algorithm == 'SVM':
        predictions = model.decision_function(x_test)
    else:
        predictions = model.predict_proba(x_test)[:, 1]

    return optunity.metrics.roc_auc(y_test, predictions, positive=True)


performance(algorithm='k-nn', n_neighbors=3)

optimal_configuration, info, _ = optunity.maximize_structured(
    performance, search_space=search, num_evals=300)

print(optimal_configuration)
print(info.optimum)
solution = dict([(k, v) for k, v in optimal_configuration.items()
                 if v is not None])
print('Solution\n========')
print("\n".join(map(lambda x: "%s \t %s" % (x[0], str(x[1])),
                    solution.items())))


#basic optim
def create_objective_function():
    xoff = random.random()
    yoff = random.random()
Exemple #11
0
 def optimise_sgd(self):
     self.__log.write("Optimising SGD model")
     self.__sgd_tuned_auroc_ = self.__cv_decorator_(self.__sgd_tuned_auroc_)
     optimal_sgd_pars, info, _ = optunity.maximize_structured(self.__sgd_tuned_auroc_, self.__sgd_space_, num_evals=150)
     print("Optimal parameters" + str(optimal_sgd_pars))
     print("AUROC of tuned SVM: %1.3f" % info.optimum)
Exemple #12
0
                    C=0,
                    logGamma=0,
                    degree=0,
                    coef0=0):
    model = train_model(x_train, y_train, kernel, C, logGamma, degree, coef0)
    decision_values = model.decision_function(x_test)
    return optunity.metrics.roc_auc(y_test, decision_values)


svm_tuned_auroc = cv_decorator(svm_tuned_auroc)

#print svm_default_auroc(C=1.0, logGamma=0.0)

# optimal_rbf_pars, info, _ = optunity.maximize(svm_rbf_tuned_auroc, num_evals=150, C=[0, 10], logGamma=[-5, 0])
optimal_svm_pars, info, _ = optunity.maximize_structured(svm_tuned_auroc,
                                                         space,
                                                         num_evals=150)

# when running this outside of IPython we can parallelize via optunity.pmap
# optimal_rbf_pars, _, _ = optunity.maximize(svm_rbf_tuned_auroc, 150, C=[0, 10], gamma=[0, 0.1], pmap=optunity.pmap)

print("Optimal parameters: " + str(optimal_svm_pars))
print("AUROC of tuned SVM with RBF kernel: %1.3f" % info.optimum)

# df = optunity.call_log2dataframe(info.call_log)
# print df.sort('value', ascending=False)

# if optimal_svm_pars['kernel'] == 'rbf':
svc = SVC(C=optimal_svm_pars['C'],
          kernel='rbf',
          gamma=10**optimal_svm_pars['logGamma'],
Exemple #13
0
def RF_pred(X_train, Y_train, X_test, Y_test, n_splits=5, num_evals=100):
    @optunity.cross_validated(x=X_train, y=Y_train, num_folds=n_splits)
    def fun_max(x_train, y_train, x_test, y_test, n_estimators, max_depth,
                max_features, min_samples_split):

        if max_features < 0.5:
            max_features = "auto"
        else:
            max_features = "sqrt"

        clf = RandomForestClassifier(n_estimators=int(n_estimators),
                                     max_depth=int(max_depth),
                                     max_features=max_features,
                                     min_samples_split=int(min_samples_split),
                                     class_weight="balanced")
        clf.fit(x_train, y_train)
        score = optunity.metrics.roc_auc(y_test,
                                         clf.predict_proba(x_test)[:, 1])

        #print(f"Average AUC on {n_splits}-fold validation with {int(n_estimators)} trees of max depth :{max_depth} = {score}")
        return (score)

    #Grid Search definition.
    # Number of trees in random forest
    n_estimators = [100, 1000]
    # Number of features to consider at every split
    max_features = [0, 1]
    # Maximum number of levels in tree
    max_depth = [5, 25]
    #max_depth.append(None)
    #Minimum number of samples required to split a node
    min_samples_split = [2, 10]
    # Minimum number of samples required at each leaf node
    #min_samples_leaf = [1,4]
    # Method of selecting samples for training each tree
    #bootstrap = [True, False]
    # Create the random grid
    random_grid = {
        'n_estimators': n_estimators,
        'max_depth': max_depth,
        'max_features': max_features,
        'min_samples_split': min_samples_split
        #'min_samples_leaf': min_samples_leaf}
        #,
        #'bootstrap': bootstrap
    }

    optimal_parameters, info, _ = optunity.maximize_structured(
        fun_max,
        search_space=random_grid,
        num_evals=num_evals,
        pmap=optunity.pmap)
    print(
        f"Optimal parameters : {optimal_parameters} with AUC of {info.optimum}"
    )

    #Evaluating on test set :
    if optimal_parameters["max_features"] < 0.5:
        max_feats = "auto"
    else:
        max_feats = "sqrt"

    clf = RandomForestClassifier(
        n_estimators=int(optimal_parameters["n_estimators"]),
        max_depth=int(optimal_parameters["max_depth"]),
        max_features=max_feats,
        min_samples_split=int(optimal_parameters["min_samples_split"]),
        class_weight="balanced")
    clf.fit(X_train, Y_train)
    fpr, tpr, _ = roc_curve(Y_test, clf.predict_proba(X_test)[:, 1])
    precision, recall, _ = precision_recall_curve(
        Y_test,
        clf.predict_proba(X_test)[:, 1])
    #np.save("./plots/fpr_RF.npy",fpr)
    #np.save("./plots/tpr_RF.npy",tpr)
    score = roc_auc_score(Y_test, clf.predict_proba(X_test)[:, 1])
    print(f"ROC AUC with optimal set of hyperparameters : {score}")
    return (score, fpr, tpr, precision, recall, clf)
def methodSelection(data,labels):

    def train_svm(data, labels, kernel, C, gamma, degree, coef0):
        """A generic SVM training function, with arguments based on the chosen kernel."""
        if kernel == 'linear':
            model = SVC(kernel=kernel, C=C)
        elif kernel == 'poly':
            model = SVC(kernel=kernel, C=C, degree=degree, coef0=coef0)
        elif kernel == 'rbf':
            model = SVC(kernel=kernel, C=C, gamma=gamma)
        else:
            raise ArgumentError("Unknown kernel function: %s" % kernel)
        model.fit(data, labels)
        return model

    search = {'algorithm': {'k-nn': {'n_neighbors': [1, 10]},
                            'SVM': {'kernel': {'linear': {'C': [0, 2]},
                                               'rbf': {'gamma': [0, 1], 'C': [0, 10]},
                                               'poly': {'degree': [2, 5], 'C': [0, 50], 'coef0': [0, 1]}
                                               }
                                    },
                            'naive-bayes': None,
                            'random-forest': {'n_estimators': [10, 30],
                                              'max_features': [5, 20]}
                            }
             }

    @optunity.cross_validated(x=data, y=labels, num_folds=4)
    def performance(x_train, y_train, x_test, y_test,
                    algorithm, n_neighbors=None, n_estimators=None, max_features=None,
                    kernel=None, C=None, gamma=None, degree=None, coef0=None):
        # fit the model
        if algorithm == 'k-nn':
            model = KNeighborsClassifier(n_neighbors=int(n_neighbors))
            model.fit(x_train, y_train)
        elif algorithm == 'SVM':
            model = train_svm(x_train, y_train, kernel, C, gamma, degree, coef0)
        elif algorithm == 'naive-bayes':
            model = GaussianNB()
            model.fit(x_train, y_train)
        elif algorithm == 'random-forest':
            model = RandomForestClassifier(n_estimators=int(n_estimators),
                                           max_features=int(max_features))
            model.fit(x_train, y_train)
        else:
            raise ArgumentError('Unknown algorithm: %s' % algorithm)

        # predict the test set
        if algorithm == 'SVM':
            predictions = model.decision_function(x_test)
        else:
            predictions = model.predict_proba(x_test)[:, 1]

        return optunity.metrics.roc_auc(y_test, predictions, positive=True)

    optimal_configuration, info, _ = optunity.maximize_structured(performance,
                                                                  search_space=search,
                                                                  num_evals=300)

    solution = dict([(k, v) for k, v in optimal_configuration.items() if v is not None])
    print('Solution\n========')
    print("\n".join(map(lambda x: "%s \t %s" % (x[0], str(x[1])), solution.items())))
    print(info.optimum)
Exemple #15
0
        raise ArgumentError('Unknown algorithm: %s' % algorithm)

    # predict the test set
    if algorithm == 'SVM':
        predictions = model.decision_function(x_test)
    else:
        predictions = model.predict_proba(x_test)[:, 1]

    return optunity.metrics.roc_auc(y_test, predictions, positive=True)
    

performance(algorithm='k-nn', n_neighbors=3)


optimal_configuration, info, _ = optunity.maximize_structured(performance,
                                                              search_space=search,
                                                              num_evals=300)
                                            
print(optimal_configuration)
print(info.optimum)                                         
solution = dict([(k, v) for k, v in optimal_configuration.items() if v is not None])
print('Solution\n========')
print("\n".join(map(lambda x: "%s \t %s" % (x[0], str(x[1])), solution.items())))

#basic optim
def create_objective_function():
    xoff = random.random()
    yoff = random.random()
    def f(x, y):
        return (x - xoff)**2 + (y - yoff)**2
    return f