def f(mi, s, md, r, c): clf = BoostedTreesClassifier(max_iterations = mi, step_size = s, max_depth = md, row_subsample = r, column_subsample = c, verbose = 0) clf.fit(X[valid_idx], y[valid_idx]) #yhat = clf.predict_proba(X[train_idx]) #return -log_loss(y[train_idx], yhat) return clf.score(X[train_idx], y[train_idx])
def OptBTC(step_size=.5, max_iterations=100, row_subsample=.9, column_subsample=.9, max_depth=8): model = BoostedTreesClassifier(step_size=step_size, max_iterations=max_iterations, row_subsample=row_subsample, column_subsample=column_subsample, max_depth=max_depth) logger.info("Params: %s", model.get_params()) return ReportPerfCV(model, "original", y)
def f(params): mi = params['mi'] md = params['md'] s = params['s'] r = params['r'] c = params['c'] mi = int(mi) md = int(md) clf = BoostedTreesClassifier(max_iterations = mi, step_size = s, max_depth = md, row_subsample = r, column_subsample = c, verbose = 0) clf.fit(X[valid_idx], y[valid_idx]) yhat = clf.predict_proba(X[train_idx]) return log_loss(y[train_idx], yhat)
'row_subsample': Uniform(.3, 1.), 'column_subsample': Uniform(.3, 1.), 'min_child_weight': LogUniform(.01, 100), 'min_loss_reduction': Uniform(0.0001, 10) } from gl import BoostedTreesClassifier logger2 = logging.getLogger('graphlab') logger2.setLevel(logging.CRITICAL) CONFIG['ensemble_list'] = [ 'btc', 'btc2', 'btc3', 'btc4', 'svc', 'svc2', 'svc3', 'nn', 'nn2', 'nic', 'mpc', 'knc', 'etc', 'cccv', 'log', 'crfcbag', 'cetcbag', 'keras' ] X, Xtest = GetDataset('ensemble', ensemble_list=CONFIG['ensemble_list']) print "lkjr" clf = GaussianProcessCV(estimator=BoostedTreesClassifier(verbose=False), param_distributions=param_distributions, kernel=DoubleExponential, scoring=LogLoss, mu_prior=-1., sigma_prior=.30, sig=.01, cv=5, max_iter=55, random_state=1, time_budget=24 * 3600) clf.fit(X, y) clf = RandomSearchCV(estimator=BoostedTreesClassifier(verbose=False), param_distributions=param_distributions, cv=5, max_iter=100,
res_cv = res_cv + yhat_cv res_test = res_test + yhat_test print i, log_loss(y, yhat_cv) res_cv = res_cv/n_bag res_test = res_test/n_bag np.savez_compressed('../Submission/yhat_cetcbag_full.npz', yhat = res_cv) np.savez_compressed('../Submission/yhat_cetcbag_test.npz', yhat = res_test) if False: X, y, Xtest = LoadData() from gl import BoostedTreesClassifier import json import time params = json.load(open('../Params/Best/BTC:original_saved_params.json')) params = params['BTC:original'] np.random.seed(1) clf = BoostedTreesClassifier(**params) res_cv, res_test = 0, 0 n_bag = 10 time_before = time.time() for i in xrange(n_bag): yhat_cv, yhat_test = GetYhat(clf, 'original', y) res_cv = res_cv + yhat_cv res_test = res_test + yhat_test print i, log_loss(y, yhat_cv) if time.time() - time_before > 9*3600: break res_cv = res_cv/(i+1) res_test = res_test/(i+1) np.savez_compressed('../Submission/yhat_btcbag_full.npz', yhat = res_cv) np.savez_compressed('../Submission/yhat_btcbag_test.npz', yhat = res_test) if True:
import numpy as np import itertools from scipy.stats import uniform from scipy.stats import randint import logging from ml import * X, _ = GetDataset('original') _, y, _ = LoadData() from gl import BoostedTreesClassifier clf = BoostedTreesClassifier(verbose = 0) clf.fit(X[:10], y[:10]) np.random.seed(1) if False: Cs = [.001, .01, .1, 1., 10.] gammas = [.001, .01, .1, 1., 10.] res = []; i = 0 for (C, gamma) in itertools.product(Cs, gammas): print i, C, gamma; i += 1 clf = SVC(C = C, gamma = gamma) clf.fit(X[train], y[train]) res.append(clf.score(X[valid], y[valid])) res2 = [] for i in xrange(len(res)*100): C = 10**uniform(-3.5,5).rvs() gamma = 10**uniform(-3.5,5).rvs() print i, C, gamma