def auto_tune(self, X, y, num_evals=50, num_folds=2, opt_metric="r_squared", nprocs=1): if nprocs == -1: nprocs = mp.cpu_count() if nprocs != 1: pmap = optunity.parallel.create_pmap(nprocs) else: pmap = inspect.signature( optunity.minimize_structured).parameters["pmap"].default if opt_metric == "r_squared": optimal_configuration, info, _ = optunity.maximize_structured( partial(self._eval_factory(num_folds, r_squared), X, y), search_space=self.search, num_evals=num_evals, pmap=pmap) if opt_metric == "mse": optimal_configuration, info, _ = optunity.minimize_structured( partial(self._eval_factory(num_folds, mse), X, y), search_space=self.search, num_evals=num_evals, pmap=pmap) return optimal_configuration
def compute_optimal_parameters(source, target, source_labels, target_labels, method_name, classifier, num_evals=50): if 'no_transfer' in method_name: return {} def to_maximize(**kwargs): kwargs = process_space_dict(kwargs, INT_ARGS) returned = do_method(source, target, source_labels, target_labels, method_name, classifier, kwargs) if method_name == 'ada': return sklearn.metrics.balanced_accuracy_score(*reversed(returned)) pred = classifier().fit(returned[0], source_labels).predict(returned[1]) # See https://stackoverflow.com/questions/25652663/scipy-sparse-eigensolver-memoryerror-after-multiple-passes-through-loop-without gc.collect() return sklearn.metrics.balanced_accuracy_score(target_labels, pred) space = SEARCH_SPACES_METHODS[method_name] optimal_params, _, _ = optunity.maximize_structured(to_maximize, space, num_evals=num_evals) optimal_params = process_space_dict(optimal_params, INT_ARGS) return optimal_params
def fit(self, X, y): if not self.loader: self.loader = lambda: (X, y) est = self.estimator scorer = self.scoring cv = self.cv loader = self.loader callback = self.callback length = len(cv) * self.max_iter if callback: callback(0, length) transforms = self.transforms cacher = self.cacher fit_callback = self.fit_callback mapper = self.mapper cv_scores = {} def fit_func(**params): params = apply_transforms(params, transforms) base_id = len(cv_scores) * len(cv) scores = PSOSearch.cross_val_score(base_index=base_id, estimator=est, parameters=params, loader=loader, cv=cv, scorer=scorer, fit_callback=fit_callback, cacher=cacher, callback=callback, mapper=mapper) cv_score = _CVScoreTuple(params, np.mean(scores), scores) cv_scores[base_id] = cv_score best_score_params = cv_scores.values()[np.argmax( np.array( map(lambda score: score.mean_validation_score, cv_scores.itervalues())))] best_score_mean = best_score_params.mean_validation_score best_score_std = np.std(best_score_params.cv_validation_scores) if callback: callback(description='%.3f+-%.3f' % (best_score_mean, best_score_std)) return scores.mean() np.random.seed(1) random.seed(1) res, optimize_results, solver_info = optunity.maximize_structured( fit_func, self.space, num_evals=self.max_iter) self._best_score = optimize_results[0] self._grid_scores = cv_scores self._best_params = res
def maximise(self, target, num_evals, search=None, verbose=20, **constants): """ maximise target within defined search space target has signature score=target(**params) constants are fixed for every iteration e.g. x and y verbose=20 report every iteration """ self.verbose = verbose if search is None: with open("search.yaml") as f: search = yaml.load(f) #pprint(search_space) self.cleanparams(search) #pprint(search_space) # note ignore return values as using runs instead # runs has functions such as plot, report and correlations optunity.maximize_structured(self.make_target(target, **constants), search_space=search, num_evals=num_evals) self.runs.report()
def run_optunity(self): cv_decorator = optunity.cross_validated( x=self.X, y=self.Y, ) svm_tuned_auroc = cv_decorator(self.svm_tuned_auroc) optimal_svm_pars, info, _ = optunity.maximize_structured( svm_tuned_auroc, self.space, num_evals=150, pmap=optunity.pmap) print("Optimal parameters" + str(optimal_svm_pars)) print("AUROC of tuned SVM: %1.3f" % info.optimum) df = optunity.call_log2dataframe(info.call_log) print(df.sort_values('value', ascending=False))
def train(self): self._pca.fit(self._features_data) features_pca = self._pca.transform(self._features_data) cv_decorator = optunity.cross_validated(x=features_pca, y=self._labels, num_folds=5) svm_tuned = cv_decorator(svm_tuned_precision) optimal_svm_pars, _, _ = optunity.maximize_structured( svm_tuned, _SVM_SEARCH_SPACE, num_evals=self._config.get('num_evals', 100)) self._model = _train_model(features_pca, self._labels, **optimal_svm_pars)
def optimize(self, data, search_space, val_data=None, num_evals=50, optimize='max', solver_name='particle swarm'): """ Parameters: ----------- data: [X, Y] - arrays This data will be used for crossval training (considering 'train_test_split' parameter) search_space: dict Dict with parameters to optimize. E.g. 'units' : [100,1000] val_data: [X, Y] - arrays Default - None. If specified than optimizer metric will be evaluated on val_data. Also if specified than 'train_test_split' parameter will be ignored. num_evals: int Count of iterations for optunity optimizer optimize: str 'max'/'min' supported solver_name: str Default 'particle swarm'. Only default parameter supported now. """ train_manager = self._create_train_manager(data, val_data, search_space) sys.setrecursionlimit(100000) if (optimize == 'max'): self.retr, self.extra, self.info = opt.maximize_structured( f=train_manager.train, num_evals=num_evals, search_space=search_space) elif (optimize == 'min'): self.retr, self.extra, self.info = opt.minimize_structured( f=train_manager.train, search_space=search_space, num_evals=num_evals) else: raise (InvalidParamError('optimize', optimize)) #load and ret best best_model = train_manager.get_best_model(self.extra) return best_model
def compute_roc_tuned(x_train, y_train, x_test, y_test): # define objective function @optunity.cross_validated(x=x_train, y=y_train, num_iter=2, num_folds=5) def inner_cv(x_train, y_train, x_test, y_test, kernel='linear', C=0, gamma=0, degree=0, coef0=0): model = train_model(x_train, y_train, kernel, C, gamma, degree, coef0) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) # optimize parameters optimal_pars, _, _ = optunity.maximize_structured(inner_cv, space, 70, pmap=optunity.pmap) print('optimal parameters after tuning %s' % str(optimal_pars)) # if you are running this in IPython, optunity.pmap will not work # more info at: https://github.com/claesenm/optunity/issues/8 # comment out the above line and replace by the one below: # optimal_pars, _, _ = optunity.maximize_structured(inner_cv, space, 200) tuned_model = train_model(x_train, y_train, **optimal_pars) decision_values = tuned_model.decision_function(x_test) auc = optunity.metrics.roc_auc(y_test, decision_values) return auc
def prepare_svm(X, Y, prob_setting): ''' Code inspired by http://optunity.readthedocs.org/en/latest/notebooks/notebooks/sklearn-svc.html#tune-svc-without-deciding-the-kernel-in-advance ''' cv_decorator = optunity.cross_validated(x=X, y=Y, num_folds=10) space = {'kernel': {'linear': {'C': [0, 1000], 'class_weight_param': [1, 22]}, 'rbf': {'logGamma': [-5, 1], 'C': [0, 1000], 'class_weight_param': [1, 22]}, 'poly': {'degree': [2, 5], 'C': [0, 1000], 'coef0': [0, 100], 'class_weight_param': [1, 22]}}} def train_model(x_train, y_train, kernel, C, logGamma, degree, coef0, classWeightParam): if kernel=='linear': model = SVC(kernel=kernel, C=C, class_weight={1: classWeightParam}) elif kernel=='poly': model = SVC(kernel=kernel, C=C, degree=degree, coef0=coef0, class_weight={1: classWeightParam}) elif kernel=='rbf': model = SVC(kernel=kernel, C=C, gamma=10 ** logGamma, class_weight={1: classWeightParam}) else: raise ValueError("Unknown kernel function: %s" % kernel) model.fit(x_train, y_train) return model def svm_tuned_auroc(x_train, y_train, x_test, y_test, kernel='linear', C=0, logGamma=0, degree=0, coef0=0, class_weight_param=1): model = train_model(x_train, y_train, kernel, C, logGamma, degree, coef0, class_weight_param) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) svm_tuned_auroc = cv_decorator(svm_tuned_auroc) optimal_svm_pars, info, _ = optunity.maximize_structured(svm_tuned_auroc, space, num_evals=200) print("Optimal parameters:"+str(optimal_svm_pars)) print("AUROC of tuned SVM: %1.3f" % info.optimum) classifier = build_svc(optimal_svm_pars, prob_setting) classifier.fit(X, Y) return classifier
model.fit(x_train, y_train) else: raise ArgumentError('Unknown algorithm: %s' % algorithm) # predict the test set if algorithm == 'SVM': predictions = model.decision_function(x_test) else: predictions = model.predict_proba(x_test)[:, 1] return optunity.metrics.roc_auc(y_test, predictions, positive=True) performance(algorithm='k-nn', n_neighbors=3) optimal_configuration, info, _ = optunity.maximize_structured( performance, search_space=search, num_evals=300) print(optimal_configuration) print(info.optimum) solution = dict([(k, v) for k, v in optimal_configuration.items() if v is not None]) print('Solution\n========') print("\n".join(map(lambda x: "%s \t %s" % (x[0], str(x[1])), solution.items()))) #basic optim def create_objective_function(): xoff = random.random() yoff = random.random()
def optimise_sgd(self): self.__log.write("Optimising SGD model") self.__sgd_tuned_auroc_ = self.__cv_decorator_(self.__sgd_tuned_auroc_) optimal_sgd_pars, info, _ = optunity.maximize_structured(self.__sgd_tuned_auroc_, self.__sgd_space_, num_evals=150) print("Optimal parameters" + str(optimal_sgd_pars)) print("AUROC of tuned SVM: %1.3f" % info.optimum)
C=0, logGamma=0, degree=0, coef0=0): model = train_model(x_train, y_train, kernel, C, logGamma, degree, coef0) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) svm_tuned_auroc = cv_decorator(svm_tuned_auroc) #print svm_default_auroc(C=1.0, logGamma=0.0) # optimal_rbf_pars, info, _ = optunity.maximize(svm_rbf_tuned_auroc, num_evals=150, C=[0, 10], logGamma=[-5, 0]) optimal_svm_pars, info, _ = optunity.maximize_structured(svm_tuned_auroc, space, num_evals=150) # when running this outside of IPython we can parallelize via optunity.pmap # optimal_rbf_pars, _, _ = optunity.maximize(svm_rbf_tuned_auroc, 150, C=[0, 10], gamma=[0, 0.1], pmap=optunity.pmap) print("Optimal parameters: " + str(optimal_svm_pars)) print("AUROC of tuned SVM with RBF kernel: %1.3f" % info.optimum) # df = optunity.call_log2dataframe(info.call_log) # print df.sort('value', ascending=False) # if optimal_svm_pars['kernel'] == 'rbf': svc = SVC(C=optimal_svm_pars['C'], kernel='rbf', gamma=10**optimal_svm_pars['logGamma'],
def RF_pred(X_train, Y_train, X_test, Y_test, n_splits=5, num_evals=100): @optunity.cross_validated(x=X_train, y=Y_train, num_folds=n_splits) def fun_max(x_train, y_train, x_test, y_test, n_estimators, max_depth, max_features, min_samples_split): if max_features < 0.5: max_features = "auto" else: max_features = "sqrt" clf = RandomForestClassifier(n_estimators=int(n_estimators), max_depth=int(max_depth), max_features=max_features, min_samples_split=int(min_samples_split), class_weight="balanced") clf.fit(x_train, y_train) score = optunity.metrics.roc_auc(y_test, clf.predict_proba(x_test)[:, 1]) #print(f"Average AUC on {n_splits}-fold validation with {int(n_estimators)} trees of max depth :{max_depth} = {score}") return (score) #Grid Search definition. # Number of trees in random forest n_estimators = [100, 1000] # Number of features to consider at every split max_features = [0, 1] # Maximum number of levels in tree max_depth = [5, 25] #max_depth.append(None) #Minimum number of samples required to split a node min_samples_split = [2, 10] # Minimum number of samples required at each leaf node #min_samples_leaf = [1,4] # Method of selecting samples for training each tree #bootstrap = [True, False] # Create the random grid random_grid = { 'n_estimators': n_estimators, 'max_depth': max_depth, 'max_features': max_features, 'min_samples_split': min_samples_split #'min_samples_leaf': min_samples_leaf} #, #'bootstrap': bootstrap } optimal_parameters, info, _ = optunity.maximize_structured( fun_max, search_space=random_grid, num_evals=num_evals, pmap=optunity.pmap) print( f"Optimal parameters : {optimal_parameters} with AUC of {info.optimum}" ) #Evaluating on test set : if optimal_parameters["max_features"] < 0.5: max_feats = "auto" else: max_feats = "sqrt" clf = RandomForestClassifier( n_estimators=int(optimal_parameters["n_estimators"]), max_depth=int(optimal_parameters["max_depth"]), max_features=max_feats, min_samples_split=int(optimal_parameters["min_samples_split"]), class_weight="balanced") clf.fit(X_train, Y_train) fpr, tpr, _ = roc_curve(Y_test, clf.predict_proba(X_test)[:, 1]) precision, recall, _ = precision_recall_curve( Y_test, clf.predict_proba(X_test)[:, 1]) #np.save("./plots/fpr_RF.npy",fpr) #np.save("./plots/tpr_RF.npy",tpr) score = roc_auc_score(Y_test, clf.predict_proba(X_test)[:, 1]) print(f"ROC AUC with optimal set of hyperparameters : {score}") return (score, fpr, tpr, precision, recall, clf)
def methodSelection(data,labels): def train_svm(data, labels, kernel, C, gamma, degree, coef0): """A generic SVM training function, with arguments based on the chosen kernel.""" if kernel == 'linear': model = SVC(kernel=kernel, C=C) elif kernel == 'poly': model = SVC(kernel=kernel, C=C, degree=degree, coef0=coef0) elif kernel == 'rbf': model = SVC(kernel=kernel, C=C, gamma=gamma) else: raise ArgumentError("Unknown kernel function: %s" % kernel) model.fit(data, labels) return model search = {'algorithm': {'k-nn': {'n_neighbors': [1, 10]}, 'SVM': {'kernel': {'linear': {'C': [0, 2]}, 'rbf': {'gamma': [0, 1], 'C': [0, 10]}, 'poly': {'degree': [2, 5], 'C': [0, 50], 'coef0': [0, 1]} } }, 'naive-bayes': None, 'random-forest': {'n_estimators': [10, 30], 'max_features': [5, 20]} } } @optunity.cross_validated(x=data, y=labels, num_folds=4) def performance(x_train, y_train, x_test, y_test, algorithm, n_neighbors=None, n_estimators=None, max_features=None, kernel=None, C=None, gamma=None, degree=None, coef0=None): # fit the model if algorithm == 'k-nn': model = KNeighborsClassifier(n_neighbors=int(n_neighbors)) model.fit(x_train, y_train) elif algorithm == 'SVM': model = train_svm(x_train, y_train, kernel, C, gamma, degree, coef0) elif algorithm == 'naive-bayes': model = GaussianNB() model.fit(x_train, y_train) elif algorithm == 'random-forest': model = RandomForestClassifier(n_estimators=int(n_estimators), max_features=int(max_features)) model.fit(x_train, y_train) else: raise ArgumentError('Unknown algorithm: %s' % algorithm) # predict the test set if algorithm == 'SVM': predictions = model.decision_function(x_test) else: predictions = model.predict_proba(x_test)[:, 1] return optunity.metrics.roc_auc(y_test, predictions, positive=True) optimal_configuration, info, _ = optunity.maximize_structured(performance, search_space=search, num_evals=300) solution = dict([(k, v) for k, v in optimal_configuration.items() if v is not None]) print('Solution\n========') print("\n".join(map(lambda x: "%s \t %s" % (x[0], str(x[1])), solution.items()))) print(info.optimum)
raise ArgumentError('Unknown algorithm: %s' % algorithm) # predict the test set if algorithm == 'SVM': predictions = model.decision_function(x_test) else: predictions = model.predict_proba(x_test)[:, 1] return optunity.metrics.roc_auc(y_test, predictions, positive=True) performance(algorithm='k-nn', n_neighbors=3) optimal_configuration, info, _ = optunity.maximize_structured(performance, search_space=search, num_evals=300) print(optimal_configuration) print(info.optimum) solution = dict([(k, v) for k, v in optimal_configuration.items() if v is not None]) print('Solution\n========') print("\n".join(map(lambda x: "%s \t %s" % (x[0], str(x[1])), solution.items()))) #basic optim def create_objective_function(): xoff = random.random() yoff = random.random() def f(x, y): return (x - xoff)**2 + (y - yoff)**2 return f