def main(): c_pairs = [f_1,f_2,f_3] kind = ['q_mse'] options = [] results = {} for c_pair in c_pairs: for opt in kind: options.append((c_pair,opt)) for c_pair,opt in options: print(c_pair,opt) def obj_f(lag,drmse,krmse,window_size): f_lag = int(lag) f_drmse = np.exp(drmse) f_krmse = np.exp(krmse) f_window = int(window_size) obj_f.m += 1 print(obj_f.m) if (opt == 'mse'): return trainMSE(c_pair+'.csv',f_lag,f_drmse,f_krmse,f_window) elif (opt == 'profit'): return trainProfit(c_pair+'.csv',f_lag,f_drmse,f_krmse,f_window) elif (opt == 'q_mse'): return trainMSEPredictLogRet(c_pair+'.csv',f_lag,f_drmse,f_krmse,f_window) elif (opt == 'q_profit'): return trainProfitPredictLogRet(c_pair+'.csv',f_lag,f_drmse,f_krmse,f_window) obj_f.m = 0 kwargs = {'lag':[5,20], 'drmse':[np.log(0.001),np.log(0.01)], 'krmse':[np.log(0.001),np.log(0.01)], 'window_size':[10,300]} ret = None if (opt == 'mse'): ret = optunity.minimize(obj_f, num_evals=100, **kwargs) with open(c_pair+'_'+opt+'.json','w') as f: f.write(json.dumps(ret)) results[(c_pair,opt)] = ret elif(opt == 'profit'): ret = optunity.maximize(obj_f, num_evals=100, **kwargs) with open(c_pair+'_'+opt+'.json','w') as f: f.write(json.dumps(ret)) results[(c_pair,opt)] = ret elif(opt == 'q_mse'): ret = optunity.minimize(obj_f, num_evals=30, **kwargs) with open(c_pair+'_'+opt+'.json','w') as f: f.write(json.dumps(ret)) results[(c_pair,opt)] = ret elif(opt == 'q_profit'): ret = optunity.maximize(obj_f, num_evals=30, **kwargs) with open(c_pair+'_'+opt+'.json','w') as f: f.write(json.dumps(ret)) results[(c_pair,opt)] = ret return results
def hypersearch_DL(x_data, y_data, method, nfolds, nevals, lrexp_range, l1rexp_range, dro_range, units1_range, units2_range, alpha_range, batch_size, num_epochs): @optunity.cross_validated(x=x_data, y=y_data, num_folds=nfolds) def modelrun(x_train, y_train, x_test, y_test, lrexp, l1rexp, dro, units1, units2, alpha): cv_log = DL_single_run(xtr=x_train, ytr=y_train, units1=units1, units2=units2, dro=dro, lr=10**lrexp, l1r=10**l1rexp, alpha=alpha, batchsize=batch_size, numepochs=num_epochs) cv_preds = cv_log.model.predict(x_test, batch_size=1)[1] cv_C = concordance_index(y_test[:, 1], -cv_preds, y_test[:, 0]) return cv_C optimal_pars, searchlog, _ = optunity.maximize(modelrun, num_evals=nevals, solver_name=method, lrexp=lrexp_range, l1rexp=l1rexp_range, dro=dro_range, units1=units1_range, units2=units2_range, alpha=alpha_range) print('Optimal hyperparameters : ' + str(optimal_pars)) print('Cross-validated C after tuning: %1.3f' % searchlog.optimum) return optimal_pars, searchlog
def time_series(): # the order of examples is dependent on time, and must be preserved # i.e. we can not randomly shuffle the data x_train, y_train, x_test, y_test = dataset_split(d, split=2./3.) dictSize = 100 def f(gamma, forget, eta, nu): return novelty_detection(x_train, y_train, x_test, y_test, gamma, forget, eta, nu, dictSize, RP=rij) return optunity.maximize(f, num_evals=n_evals, gamma=[0,1], \ forget=[0.5,1], eta=[0,0.5], nu=[0, 1], solver_name=solver )
def normal(): # we use a different optimizer. one which we can do cross-validation. X, Y, _, _ = dataset_split(d, split=1) dictSize = 100 @optunity.cross_validated(x=X, y=Y, num_folds=n_folds) def f(x_train, y_train, x_test, y_test, gamma, forget, eta, nu): return novelty_detection(x_train, y_train, x_test, y_test, gamma, forget, eta, nu, dictSize, RP=rij) return optunity.maximize(f, num_evals=n_evals, gamma=[0,1], \ forget=[0.5,1], eta=[0, 0.5], nu=[0, 1], solver_name=solver )
def hypersearch_cox(x_data, y_data, method, nfolds, nevals, penalty_range): @optunity.cross_validated(x=x_data, y=y_data, num_folds=nfolds) def modelrun(x_train, y_train, x_test, y_test, penalty): cvmod = train_cox_reg(xtr=x_train, ytr=y_train, penalty=10**penalty) cv_preds = cvmod.predict_partial_hazard(x_test) cv_C = concordance_index(y_test[:, 1], -cv_preds, y_test[:, 0]) return cv_C optimal_pars, searchlog, _ = optunity.maximize(modelrun, num_evals=nevals, solver_name=method, penalty=penalty_range) print('Optimal hyperparameters : ' + str(optimal_pars)) print('Cross-validated C after tuning: %1.3f' % searchlog.optimum) return optimal_pars, searchlog
def hypersearch_nn(x_data, y_data, method, nfolds, nevals, batch_size, num_epochs, backend: str, model_kwargs: dict, **hypersearch): @optunity.cross_validated(x=x_data, y=y_data, num_folds=nfolds) def modelrun(x_train, y_train, x_test, y_test, **hypersearch): cv_log = train_nn( backend=backend, xtr=x_train, ytr=y_train, batch_size=batch_size, n_epochs=num_epochs, **model_kwargs, **hypersearch ) cv_preds = cv_log.predict(x_test, batch_size=1)[1] cv_C = concordance_index(y_test[:, 1], -cv_preds, y_test[:, 0]) return cv_C optimal_pars, searchlog, _ = optunity.maximize( modelrun, num_evals=nevals, solver_name=method, **hypersearch ) print('Optimal hyperparameters : ' + str(optimal_pars)) print('Cross-validated C after tuning: %1.3f' % searchlog.optimum) return optimal_pars, searchlog
def compute_roc_tuned(x_train, y_train, x_test, y_test): # define objective function @optunity.cross_validated(x=x_train, y=y_train, num_iter=2, num_folds=5) def inner_cv(x_train, y_train, x_test, y_test, C, gamma): model = sklearn.svm.SVC(C=C, gamma=gamma).fit(x_train, y_train) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) # optimize parameters optimal_pars, _, _ = optunity.maximize(inner_cv, 150, C=[0, 10], gamma=[0, 0.1], pmap=optunity.pmap) # if you are running this in IPython, optunity.pmap will not work # more info at: https://github.com/claesenm/optunity/issues/8 # comment out the above line and replace by the one below: # optimal_pars, _, _ = optunity.maximize(inner_cv, 200, C=[0, 10], gamma=[0, 0.1]) tuned_model = sklearn.svm.SVC(**optimal_pars).fit(x_train, y_train) decision_values = tuned_model.decision_function(x_test) auc = optunity.metrics.roc_auc(y_test, decision_values) return auc
def time_series(): # the order of examples is dependent on time, and must be preserved # i.e. we can not randomly shuffle the data x_train, y_train, x_test, y_test = dataset_split(d, split=2. / 3.) dictSize = 100 def f(gamma, forget, eta, nu): return novelty_detection(x_train, y_train, x_test, y_test, gamma, forget, eta, nu, dictSize, RP=rij) return optunity.maximize(f, num_evals=n_evals, gamma=[0,1], \ forget=[0.5,1], eta=[0,0.5], nu=[0, 1], solver_name=solver )
import threading from common.utils import send_mail import optunity from common import utils import pandas as pd def run_d(): print('start') threading.Thread(target=send_mail, args=('test','xxx')).start() print('nihoa ') if __name__== '__main__': # print('!') # run_d() # print("over") all_data = pd.read_hdf('/Users/lensonyuan/gitLocal/every_coin_helps/result/data_EOS_2019_4.h5', key='data') optimal_rbf_pars, info, _ = optunity.maximize(lambda x, y: x * y, num_evals=500, solver_name='particle swarm', x=[0, 10], y=[-5, 5]) # default: 'particle swarm' print(optimal_rbf_pars) print(info.optimum) df = optunity.call_log2dataframe(info.call_log) print(df.sort_values('value', ascending=False)[:10])
# A simple smoke test for all available solvers. import optunity def f(x, y): return x + y solvers = optunity.available_solvers() for solver in solvers: # simple API opt, _, _ = optunity.maximize(f, 100, x=[0, 5], y=[-5, 5], solver_name=solver) # expert API suggestion = optunity.suggest_solver(num_evals=100, x=[0, 5], y=[-5, 5], solver_name=solver) s = optunity.make_solver(**suggestion) # without parallel evaluations opt, _ = optunity.optimize(s, f) # with parallel evaluations opt, _ = optunity.optimize(s, f, pmap=optunity.pmap)
return run_test(False, my_configs) if __name__ == '__main__': if len(sys.argv) != 4: print "usage: %s quotes.csv strategy.py num_iterations" % sys.argv[0] sys.exit(0) quote_file = os.path.basename(sys.argv[1]) strategy_file = os.path.basename(sys.argv[2]) NUM_EVALS = int(sys.argv[3]) print "Starting %i evaluations..." % NUM_EVALS my_configs = {} with open(os.path.join(base_path,"parameters.csv"), "r") as infile: all_lines = infile.readlines() for line in all_lines[1:]: s = line.split(",") name = s[0] minimum = long(s[1]) maximum = long(s[2].split("#")[0]) my_configs[name] = [minimum, maximum] hps, d1, d2 = optunity.maximize(evaluate, num_evals=NUM_EVALS, pmap=optunity.pmap, **my_configs) # Print final report print "\n\n\n\n" print "*** Optimal Score:", str(d1.optimum) print "*** Best Parameters Found:", hps print "\n\n\n\n" test_run(**hps)
name = sys.argv[1] budget = 150 search = {'logC': [-8, 1], 'logGamma': [-8, 1]} objfun = executable.unpickled['objfun'] def quacking_objfun(**kwargs): result = objfun(**kwargs) print(str(kwargs) + ' --> %f' % result) return result if __name__ == '__main__': pars, info, _ = optunity.maximize(quacking_objfun, num_evals=budget, pmap=optunity.pmap, **search) df = optunity.call_log2dataframe(info.call_log) with open('results/%s-optunity.pkl' % name, 'w') as f: log = { 'results': df['value'], 'logC': df['logC'], 'logGamma': df['logGamma'] } pickle.dump(log, f)
y_pre = rf.predict(x_test) #pcc = round(np.corrcoef(y_pre, y_test)[0][1], 5) acc = optunity.metrics.accuracy(y_pre, y_test) # auc = optunity.metrics.roc_auc(y_test, decision_values) return acc #auc = optunity.metrics.roc_auc(y_test, decision_values) #print(pcc_test) #return optunity.metrics.mse(y_test, y_pre) svr_rforest_tuned_acc = cv_decorator(svr_rforest_tuned_acc) # this is equivalent to the more common syntax below # @optunity.cross_validated(x=data, y=labels, num_folds=5) # def svm_rbf_tuned_auroc...max_features=['square', 'log'], optimal_rbf_pars, info, _ = optunity.maximize(svr_rforest_tuned_acc, num_evals=200, n_estimators=[1, 500], max_depth=[1,100], min_samples_leaf=[1, 20], min_samples_split=[2, 20]) # when running this outside of IPython we can parallelize via optunity.pmap # optimal_rbf_pars, _, _ = optunity.maximize(svm_rbf_tuned_auroc, 150, C=[0, 10], gamma=[0, 0.1], pmap=optunity.pmap) print("Optimal parameters: " + str(optimal_rbf_pars)) print("ACC of tuned rf with hyper parameters %1.5f" % info.optimum) #regressor = SVR(kernel='rbf', gamma=10 ** optimal_rbf_pars['logGamma'], C = optimal_rbf_pars['C']) #kernel = KF.gaussianKernel1(X_test.T, X_train.T, 10 ** optimal_rbf_pars['logGamma']) #regressor.fit(X_train,Y_train) #y_train = regressor.predict(X_train) #y_predict = regressor.predict(X_test) #X_train, X_test, y_train, y_test = train_test_split(sample, label, test_size=0.2, random_state=42) rf1 = RandomForestClassifier(n_estimators=int(optimal_rbf_pars['n_estimators']), max_features='sqrt', max_depth=int(optimal_rbf_pars['max_depth']), min_samples_leaf= int(optimal_rbf_pars['min_samples_leaf']), min_samples_split=
''' # create the regressor object svm = sv.SVR(kernel='rbf', C=0.1 * logC, gamma=0.1 * logGamma) # estimate the model svm.fit(x_train,y_train) # decision function decision_values = svm.decision_function(x_test) # return the object return mt.roc_auc(y_test, decision_values) # find the optimal values of C and gamma hps, _, _ = opt.maximize(regression_svm, num_evals=10, logC=[3, 10], logGamma=[3, 20]) # and the values are... print('The optimal values are: C - {0:.2f}, gamma - {1:.2f}'\ .format(0.1 * hps['logC'], 0.1 * hps['logGamma'])) # estimate the model with optimal values regressor = sv.SVR(kernel='rbf', C=0.1 * hps['logC'], gamma=0.1 * hps['logGamma'])\ .fit(x_reg, y) # predict the output predicted = regressor.predict(x_reg) # and calculate the R^2
import optunity.metrics import sklearn.svm import numpy as np from sklearn import preprocessing X_train = np.loadtxt('../data/ivecs/cold/ivecs-16g-cold-128i-train') X_dev = np.loadtxt('../data/ivecs/cold/ivecs-16g-cold-128i-dev') y_train = np.loadtxt("../data/ivecs/cold/labels.num.train.txt") y_dev = np.loadtxt("../data/ivecs/cold/labels.num.dev.txt") y_train = y_train[-7604:] X_train_scaled = preprocessing.scale(X_train) X_dev_scaled = preprocessing.scale(X_dev) # score function: twice iterated 10-fold cross-validated accuracy @optunity.cross_validated(x=X_train_scaled, y=y_train, num_folds=100, num_iter=7) def svm_auc(x_train, y_train, x_test, y_test, logC, logGamma): model = sklearn.svm.SVC(C=10 ** logC, gamma=10 ** logGamma, kernel='linear').fit(x_train, y_train) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) # perform tuning hps, _, _ = optunity.maximize(svm_auc, num_evals=100, logC=[-5, 2], logGamma=[-5, 1]) # train model on the full training set with tuned hyperparameters print("learning on the training dataset") optimal_model = sklearn.svm.SVC(C=10 ** hps['logC'], gamma=10 ** hps['logGamma']).fit(X_train_scaled, y_train)
def perform_svm(attributes, data_path, version, test_percentage, min_l): # read features and captions features, captions = read_data(version, data_path) # filter for requiered words features1 = filter(attributes[0], features, captions) features2 = filter(attributes[1], features, captions) # find minimum length min_nr = min(len(features1), len(features2)) if min_nr < min_l: min_l = min_nr print("Total data size is {}".format(min_l * 2)) # make both sets equal features1 = random.sample(features1, min_l) features2 = random.sample(features2, min_l) # create test_set x_train, x_test, y_train, y_test = create_train_test( features1, features2, test_percentage) best_f1 = 0 best_c = 0 best_gamma = 0 seg = None for i in range(7): # choose correct fragment x_train_seg = x_train[:, i, :] x_test_seg = x_test[:, i, :] @optunity.cross_validated(x=x_train_seg, y=y_train, num_folds=10, num_iter=2) def svm_auc(x_train, y_train, x_test, y_test, logC, logGamma): model = svm.SVC(C=10**logC, gamma=10**logGamma).fit(x_train, y_train) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) print("{}: Performing hyperparameter tuning layer/segment {}".format( datetime.datetime.now().time(), i)) hps, _, _ = optunity.maximize(svm_auc, num_evals=200, logC=[-5, 2], logGamma=[-5, 1]) print("Found optimal parameters for layer/seg {}, c({}), gamma({})". format(i, 10**hps['logC'], 10**hps['logGamma'])) # train model on the full training set with tuned hyperparameters optimal_model = svm.SVC(C=10**hps['logC'], gamma=10**hps['logGamma']).fit( x_train_seg, y_train) pred = optimal_model.predict(x_test_seg) f1 = calc_score(pred, y_test) if f1 > best_f1: best_f1 = f1 best_c = 10**hps['logC'] best_gamma = 10**hps['logGamma'] seg = i return best_f1, best_c, best_gamma, seg
print('I am called', datetime.now().strftime('%H:%M:%S')) cerebro = bt.Cerebro() cerebro.addstrategy(SmaCross, sma1=sma1, sma2=sma2) cerebro.adddata(data0) cerebro.broker.setcash(10000.0) # 设置初始资金 cerebro.run() return cerebro.broker.getvalue() # 执行优化,第一个参数是评估函数 # 执行5次回测(num_evals,实战时回测次数要设大一些,比如100次),设置两个参数sma1,sma2的取值范围 # solver_name可取算法包括 particle swarm,sobol,random search,cma-es,grid search opt = optunity.maximize(runstrat, num_evals=10, solver_name='particle swarm', sma1=[2, 55], sma2=[2, 55]) ######################################## # 优化完成,得到最优参数结果 optimal_pars, details, _ = opt print('Optimal Parameters:') print('sma1 = %.2f' % optimal_pars['sma1']) print('sma2 = %.2f' % optimal_pars['sma2']) # 利用最优参数最后回测一次,作图 cerebro = bt.Cerebro() cerebro.addstrategy(SmaCross, sma1=optimal_pars['sma1'], sma2=optimal_pars['sma2']) cerebro.adddata(data0)
NUM_EPOCHS = args.num_epochs NUM_FOLDS = args.num_folds global main_logger main_logger = load_logger(args.logdir) main_logger.debug('Parameters: ' + str(args)) main_logger.debug('Loading dataset: ' + args.dataset) x, y, strata = load_dataset(args.dataset) box_constraints = load_box_constraints(args.box) main_logger.debug('Box Constraints: ' + str(box_constraints)) opt_fxn = get_objective_function(NUM_EPOCHS, args.logdir, utils.get_optimizer_from_str(args.update_fn)) opt_fxn = optunity.cross_validated(x=x, y=y, num_folds=NUM_FOLDS, strata=strata)(opt_fxn) main_logger.debug('Maximizing C-Index. Num_iterations: %d' % args.num_evals) opt_params, call_log, _ = optunity.maximize(opt_fxn, num_evals=args.num_evals, solver_name='sobol', **box_constraints) main_logger.debug('Optimal Parameters: ' + str(opt_params)) main_logger.debug('Saving Call log...') print(call_log._asdict()) save_call_log(os.path.join(args.logdir, 'optunity_log_%s.pkl' % (str(uuid.uuid4()))), call_log._asdict()) exit(0)
plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") print("Saving Figure for C = " + str(c)) plt.savefig(file_path + "AUC_RF_C" + str(c).replace(".", "_") + ".pdf") print("Done with thread C = " + str(c)) return (0) @optunity.cross_validated(x=latent_pat, y=tag_mat, num_folds=5) def RF_auc(x_train, y_train, x_test, y_test, log_n_est, max_depth): model = RandomForestClassifier(max_depth=int(max_depth), n_estimators=int(10**log_n_est), class_weight="balanced") probs_ = model.fit(x_train, y_train).predict_proba(x_test) auc_roc = optunity.metrics.roc_auc(y_test, probs_[:, 1]) print("AUC for n_est =" + str(10**log_n_est) + "and max_depth =" + str(max_depth) + " is " + str(auc_roc)) return (auc_roc) hps, a, b = optunity.maximize(RF_auc, num_evals=200, log_n_est=[1.5, 3.5], max_depth=[1, 30]) optimal_model = RandomForestClassifier(n_estimators=int(10**hps['log_n_est']), max_depth=hps['max_depth'], class_weight="balanced") print(hps["n_estimators"]) #compute_AUC([0]) #optunity_RF()
binary=True) # Processes test data print("Processing test data...") test = open(training_filename, 'r') data = test.read().split('\n') (X, y, vectors) = vectorify(data, vector_model, glove) assert (len(X) == len(y) and len(X) == len(vectors)) # Training the model using optunity print('Training the LinearSVM...') @optunity.cross_validated(x=vectors, y=y, num_folds=2, num_iter=1) def svm_auc(x_train, y_train, x_test, y_test, logC): model = svm.LinearSVC(C=10**logC).fit(x_train, y_train) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) # perform tuning hps, _, _ = optunity.maximize(svm_auc, num_evals=200, logC=[-4, 2]) # train model on the full training set with tuned hyperparameters optimal_model = svm.LinearSVC(C=10**hps['logC']).fit(vectors, y) print('The model has been dumped to the file: clf1.model') pickle.dump(optimal_model, open('clf1.model', 'wb'))
import optunity import optunity.metrics import sklearn.svm # score function: twice iterated 10-fold cross-validated accuracy @optunity.cross_validated(x=data, y=labels, num_folds=10, num_iter=2) def svm_auc(x_train, y_train, x_test, y_test, logC, logGamma): model = sklearn.svm.SVC(C=10 ** logC, gamma=10 ** logGamma).fit(x_train, y_train) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) # perform tuning hps, _, _ = optunity.maximize(svm_auc, num_evals=200, logC=[-5, 2], logGamma=[-5, 1]) # train model on the full training set with tuned hyperparameters optimal_model = sklearn.svm.SVC(C=10 ** hps['logC'], gamma=10 ** hps['logGamma']).fit(data, labels)
data0 = bt.feeds.YahooFinanceData(dataname='AAPL', fromdate=datetime(2011, 1, 1), todate=datetime(2011, 12, 31)) def runstrat(sma1, sma2): cerebro = bt.Cerebro() cerebro.addstrategy(SmaCross, sma1=sma1, sma2=sma2) print('Evaluate sma1:' + str(sma1) + ', sma2:' + str(sma2)) cerebro.adddata(data0) cerebro.run() return cerebro.broker.getvalue() opt = optunity.maximize(runstrat, num_evals=5, sma1=[2, 55], sma2=[2, 55]) optimal_pars, details, _ = opt print('----------------------') print('Optimal Parameters:') print('sma1 = %.2f' % optimal_pars['sma1']) print('sma2 = %.2f' % optimal_pars['sma2']) cerebro = bt.Cerebro() cerebro.addstrategy(SmaCross, sma1=optimal_pars['sma1'], sma2=optimal_pars['sma2']) cerebro.adddata(data0) cerebro.run() cerebro.plot()
trainy = np.array(trainy.tolist()[0]) testy = np.array(testy.tolist()[0]) ################################################################### ################## Code from sklearn website####################### # score function: twice iterated 5-fold cross-validated accuracy @optunity.cross_validated(x=trainx, y=trainy, num_folds=4, num_iter=2) def svm_auc(x_train, y_train, x_test, y_test, logC, logGamma): model = sklearn.svm.SVC(C=10 ** logC, gamma=10 ** logGamma,kernel='rbf').fit(x_train, y_train) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) # perform tuning hps, _, _ = optunity.maximize(svm_auc, num_evals=50, logC=[-15, 15], logGamma=[-100, 100]) #print(hps) # train model on the full training set with tuned hyperparameters optimal_model = sklearn.svm.SVC(C=3, gamma=0.00025,class_weight=cost_dict).fit(trainx, trainy) ################################################################## output = [] ourguess = [] for j in range(15): test = optimal_model.predict(testx[j,:]) ourguess.append(test[0]) ourguess = np.array(ourguess) binary = []
import optunity # Load and parse the data def parsePoint(line): values = [float(x) for x in line.split(' ')] return LabeledPoint(values[0], values[1:]) data = sc.textFile("sample_svm_data.txt") parsedData = data.map(parsePoint).cache() # cross-validation using optunity @optunity.cross_validated(x=parsedData, num_folds=5, num_iter=1) def logistic_l2_accuracy(x_train, x_test, regParam): # cache data to get reasonable speeds for methods like LogisticRegression and SVM xc = x_train.cache() # training logistic regression with L2 regularization model = LogisticRegressionWithSGD.train(xc, regParam=regParam, regType="l2") # making prediction on x_test yhat = x_test.map(lambda p: (p.label, model.predict(p.features))) # returning accuracy on x_test return yhat.filter(lambda (v, p): v == p).count() / float(x_test.count()) # using default maximize (particle swarm) with 10 evaluations, regularization between 0 and 10 optimal_pars, _, _ = optunity.maximize(logistic_l2_accuracy, num_evals=10, regParam=[0, 10]) # training model with all data for the best parameters model = LogisticRegressionWithSGD.train(parsedData, regType="l2", **optimal_pars) # prediction (in real application you would use here newData instead of parsedData) yhat = parsedData.map(lambda p: (p.label, model.predict(p.features)))
def main(args): start_time = time.time() global scriptName scriptName = args[1] global fileName fileName = args[2] num_evals = int(args[3]) solver_name = args[4] print("Input: " + str(args)) search_space = { 'kernel': { 'linear': { 'C': [0, 10] }, 'rbf': { 'gamma': [0, 1], 'C': [0, 10] }, 'poly': { 'degree': [2, 5], 'C': [0, 10], 'coef0': [0, 1] } } } f = external_svm tree = search_spaces.SearchTree(search_space) box = tree.to_box() # we need to position the call log here # because the function signature used later on is internal logic f = fun.logged(f) # wrap the decoder and constraints for the internal search space representation f = tree.wrap_decoder(f) f = api._wrap_hard_box_constraints(f, box, -sys.float_info.max) # build solver suggestion = api.suggest_solver(num_evals, solver_name, **box) solver = api.make_solver(**suggestion) # solution, details = api.optimize(solver, f, maximize=True, max_evals=num_evals, # pmap=optunity.pmap, decoder=tree.decode) # maximize(f, num_evals=50, solver_name=None, pmap=map, **kwargs) solution, details, suggestion = optunity.maximize(external_svm_wrapper, num_evals=num_evals, solver_name=solver_name, pmap=optunity.pmap, kernelIndex=[0, 1], logC=[0, 1], logGamma=[0, 1], degree=[3, 5], coef0=[0, 1]) print("Optimal parameters: " + str(solution)) print("Optimal value: " + str(details.optimum)) print("--- %s seconds ---" % (time.time() - start_time))
@optunity.cross_validated(x=x_train, y=y_train, num_folds=10, num_iter=2) def svm_auc(x_train, y_train, x_test, y_test, logC, logGamma): svc = svm.SVC( kernel='rbf', C=10**logC, gamma=10**logGamma, ) svc = svc.fit(x_train, y_train) decision_values = svc.decision_function(x_test) auc = optunity.metrics.roc_auc(y_test, decision_values) return auc print("[INFO] Running cross validation to find optimal parameters...") hps, info, _ = optunity.maximize(svm_auc, num_evals=20, logC=[-5, 2], logGamma=[-5, 1]) print("[INFO] " + str(hps)) # print(info) svc = svm.SVC(C=10**hps['logC'], gamma=10**hps['logGamma']).fit(x_train, y_train) # EXPORT print("[INFO] Creating Model...") model_dir = os.path.join(file_path, "data", "models", "svm_" + USER_ID) if (USER_ID == ""): model_dir += "ALL" if (USE_FFT): model_dir += "_FFT" else: model_dir += "_RAW"
model = sklearn.svm.SVC(C=C, gamma=10**logGamma).fit(x_train, y_train) decision_values = model.decision_function(x_test) auc = optunity.metrics.roc_auc(y_test, decision_values) return auc svm_rbf_tuned_auroc = cv_decorator(svm_rbf_tuned_auroc) # this is equivalent to the more common syntax below # @optunity.cross_validated(x=data, y=labels, num_folds=5) # def svm_rbf_tuned_auroc... svm_rbf_tuned_auroc(C=1.0, logGamma=0.0) # find optimal parameters optimal_rbf_pars, info, _ = optunity.maximize(svm_rbf_tuned_auroc, num_evals=150, C=[0, 10], logGamma=[-5, 0]) # when running this outside of IPython we can parallelize via optunity.pmap # optimal_rbf_pars, _, _ = optunity.maximize(svm_rbf_tuned_auroc, 150, C=[0, 10], gamma=[0, 0.1], pmap=optunity.pmap) print("Optimal parameters: " + str(optimal_rbf_pars)) print("AUROC of tuned SVM with RBF kernel: %1.3f" % info.optimum) df = optunity.call_log2dataframe(info.call_log) print("Top 10 parameters for RBF kernel:") print(df.sort_values('value', ascending=False)[:10]) # 4. Tune SVC without deciding the kernel in advance # Define parameter search space space = { 'kernel': {
import optunity.metrics # score function: twice iterated 10-fold cross-validated accuracy @optunity.cross_validated(x=X, y=y, num_folds=10, num_iter=2) def svm_auc(x_train, y_train, x_test, y_test, logC, logGamma): model = SVC(C=10**logC, gamma=10**logGamma).fit(x_train, y_train) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) hps, _, _ = optunity.maximize(svm_auc(x_train=X, y_train=y, x_test=test, y_test=t, logC=-5, logGamma=-5), num_evals=200, logC=[-5, 2], logGamma=[-5, 1]) optimal_model = SVC(C=10**hps['logC'], gamma=10**hps['logGamma']).fit(X, y) # # clf = SVC() # clf.fit(X, y) # SVC(C=1.0, cache_size=200, class_weight='auto', coef0=0.0, # decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', # max_iter=-1, probability=False, random_state=None, shrinking=True, # tol=0.001, verbose=False) result = optimal_model.predict(X)
def fit(self, obj_fun=None, param_range=None, num_evals=1000, solver_name="sobol", maximize=True, num_jobs=1, engine="optunity", random_state=1, init_points=20, n_iter=50, acq='ucb', kappa=2.576, xi=0.0): if engine == "optunity": if obj_fun: self.obj_fun = obj_fun if param_range: self.param_range = param_range if num_jobs == 1: if maximize: self.optimal_params, self.info, _ = op.maximize( self.obj_fun, num_evals=num_evals, solver_name=solver_name, **self.param_range) # default: 'particle swarm' else: self.optimal_params, self.info, _ = op.minimize( self.obj_fun, num_evals=num_evals, solver_name=solver_name, **self.param_range) # de # params # print(self.optimal_params) # optimum and states print(self.info.optimum, self.info.stats) elif num_jobs > 1: if maximize: self.optimal_params, self.info, _ = op.maximize( self.obj_fun, num_evals=num_evals, solver_name=solver_name, pmap=op.parallel.create_pmap(num_jobs), **self.param_range) # default: 'particle swarm' else: self.optimal_params, self.info, _ = op.minimize( self.obj_fun, num_evals=num_evals, solver_name=solver_name, pmap=op.parallel.create_pmap(num_jobs), **self.param_range) # de # params # print(self.optimal_params) # optimum and states print(self.info.optimum, self.info.stats) elif engine == "bayes_opt": if obj_fun: self.obj_fun = obj_fun if param_range: self.param_range = param_range param_range_t = {} for key in self.param_range: param_range_t[key] = tuple(self.param_range[key]) optimizer = BayesianOptimization( f=self.obj_fun, pbounds=param_range_t, random_state=random_state, ) optimizer.maximize( init_points=init_points, n_iter=n_iter, acq=acq, kappa=kappa, xi=xi, ) self.optimal_params = optimizer.max['params']
# ============================================================================= # Optunity start # ============================================================================= print('Box Constraints: ' + str(box_constraints)) logger.log(logging.INFO, 'Box Constraints: %s' % str(box_constraints)) print('Maximizing F1 score. Num_iterations: %d' % args.optunity_iteration) logger.log( logging.INFO, 'Maximizing F1 score. Num_iterations: %d' % args.optunity_iteration) opt_params, call_log, _ = optunity.maximize( objective_function, num_evals=args.optunity_iteration, solver_name='sobol', **box_constraints) lr = 10**opt_params['log_learning_rate'] hidden_size = int(opt_params['hidden_size']) nof_hidden_layer = int(opt_params['nof_hidden_layer']) dropout_rate = opt_params['dropout_rate'] l1_reg = opt_params['l1_reg'] print("[Optimal Parameters] lr: %.4E; n_layers: %d; n_hidden: %d; l1_reg: %.4E; dropout: %.4f" % \ (lr, nof_hidden_layer, hidden_size, l1_reg, dropout_rate)) logger.log(logging.INFO, "[Optimal Parameters] lr: %.4E; n_layers: %d; n_hidden: %d; l1_reg: %.4E; dropout: %.4f" % \ (lr, nof_hidden_layer, hidden_size, l1_reg, dropout_rate))
X = X[:10000,:] y = y[:10000] # score function: twice iterated 10-fold cross-validated accuracy @optunity.cross_validated(x= X_train, y= y_train, num_folds=10, num_iter=2) def svm_auc(x_train, y_train, x_test, y_test, logC, logGamma): model = sklearn.svm.SVC(C=10 ** logC, gamma=10 ** logGamma).fit(x_train, y_train) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) time3 = time() print("perform tuning...") # perform tuning hps, _, _ = optunity.maximize(svm_auc, num_evals=200, logC=[-3, 3], logGamma=[-5, 1], pmap = optunity.pmap) time1 = time() print("optimization time:", time1 - time3) print() print("train model on the full training set with tuned hyperparameters...") # train model on the full training set with tuned hyperparameters clfSVM = sklearn.svm.SVC(C=10 ** hps['logC'], gamma=10 ** hps['logGamma']).fit(X, y) print() print("fitting finished") # Prediction y_pred_train = clfSVM.predict(X_train)
#!/usr/bin/env python # A simple smoke test for all available solvers. import optunity def f(x, y): return x + y solvers = optunity.available_solvers() for solver in solvers: # simple API opt, _, _ = optunity.maximize(f, 100, x=[0, 5], y=[-5, 5], solver_name=solver) # expert API suggestion = optunity.suggest_solver(num_evals=100, x=[0, 5], y=[-5, 5], solver_name=solver) s = optunity.make_solver(**suggestion) # without parallel evaluations opt, _ = optunity.optimize(s, f) # with parallel evaluations opt, _ = optunity.optimize(s, f, pmap=optunity.pmap)
ax2.plot(pos[:,0], pos[:,1], 'bo') ax2.set_xlabel('x1') ax2.set_ylabel('x2') fig2.savefig('dataCloud') #%% ####################################### @optunity.cross_validated(x=data, y=labels, num_folds=5, regenerate_folds=True) def svm_rbf_tuned_auroc(x_train, y_train, x_test, y_test, logC, logGamma): model = sklearn.svm.SVC(C=10 ** logC, gamma=10 ** logGamma).fit(x_train, y_train) decision_values = model.decision_function(x_test) auc = optunity.metrics.roc_auc(y_test, decision_values) return auc optimal_rbf_pars, info, _ = optunity.maximize(svm_rbf_tuned_auroc, num_evals=300, logC=[-4, 2], logGamma=[-5, 0]) # when running this outside of IPython we can parallelize via optunity.pmap # optimal_rbf_pars, _, _ = optunity.maximize(svm_rbf_tuned_auroc, 150, C=[0, 10], gamma=[0, 0.1], pmap=optunity.pmap) print('**********************************************') print("Optimal parameters: " + str(optimal_rbf_pars)) print("AUROC of tuned SVM with RBF kernel: %1.3f" % info.optimum) df = optunity.call_log2dataframe(info.call_log) ################################ cutoff = 0.1 fig3 = plt.figure() ax3 = fig3.add_subplot(111, projection='3d') ax3.scatter(xs=df[df.value > cutoff]['logC'], ys=df[df.value > cutoff]['logGamma'],
def test(data_cache=None, lag_param=1): import optunity from data_maker import f_1, get_data, transformToLogRet from InputStandardizer import InputStandardizer from FOSELM import EOSELM, FOSELM from sofnn import MIMOSOFNN from financial_math import mean_squared_error,mean_average_error,smape, \ ln_q,ndei,sortino_ratio,sharpe_ratio,total_return from PANFIS import MOGENEFIS, MOGSEFS import itertools import datetime n_input = 5 trade_cost = 0.001 c_pair = f_1 for lag in [lag_param]: print('lag = {}'.format(lag)) train_data, test_data, sample_data = (None, None, None) if data_cache: train_data, test_data, sample_data = data_cache(c_pair, lag) else: train_data, test_data, sample_data = get_data(c_pair, lag, True, noisy=False) """ c_pair = f_1 lag = 13,#1,7,13 trade_cost = 0.001 actions = [-1, -0.5, 0, 0.5, 1, 2] train_data,test_data,sample_data = get_data(c_pair,lag,True) n = 5 t_train_data = transformToLogRet(train_data) t_test_data = transformToLogRet(test_data) t_sample_data = transformToLogRet(sample_data) n_target = 6 """ window_machines = ['foselm', 'sofnn'] panfis_machines = ['genefis', 'gsefs3', 'gsefs4', 'gsefs6'] q_makers = ['q', 'antiq'] p_makers = ['p', 'antip'] pred_makers = ['-'] perform_funcs = ["Return", "Sharpe Ratio", "Sortino Ratio"] perform_waits = [13] pred_waits = [1] pred_funcs = [ "Mean Squared Error", "Mean Average Error", "SMAPE", "Ln Q" ] conf_1 = list( itertools.product(['genefis'], perform_funcs, p_makers + q_makers, perform_waits)) conf = conf_1 for machine, function_text, maker, wait in conf: print('{} {} {} {}'.format(machine, maker, function_text, wait)) def obj_c(window=None, standard=None, useless=None): decision_maker = None t_train_data = None t_test_data = None t_sample_data = None n_output = None strategy_kind = maker strategy_lag = wait if (strategy_kind == "p"): decision_maker = predictDecisionMaker(nWait=strategy_lag, leverage=2) t_train_data = train_data t_test_data = test_data t_sample_data = sample_data n_output = 1 elif (strategy_kind == "antip"): decision_maker = antiPredictDecisionMaker( nWait=strategy_lag, leverage=2) t_train_data = train_data t_test_data = test_data t_sample_data = sample_data n_output = 1 elif (strategy_kind == "q"): actions = [2 * -1 + 1, 0, 2] decision_maker = qDecisionMaker(nWait=strategy_lag, actions=actions) t_train_data = transformToLogRet(train_data) t_test_data = transformToLogRet(test_data) t_sample_data = transformToLogRet(sample_data) n_output = 3 elif (strategy_kind == "antiq"): actions = [2 * -1 + 1, 0, 2] decision_maker = antiQDecisionMaker(nWait=strategy_lag, actions=actions) t_train_data = transformToLogRet(train_data) t_test_data = transformToLogRet(test_data) t_sample_data = transformToLogRet(sample_data) n_output = 3 if (strategy_kind == "-"): decision_maker = predictDecisionMaker(nWait=strategy_lag, leverage=1) t_train_data = train_data t_test_data = test_data t_sample_data = sample_data n_output = 1 assert (decision_maker is not None) assert (t_train_data is not None) assert (t_test_data is not None) assert (t_sample_data is not None) assert (n_output is not None) m = None if (machine == 'sofnn'): window = int(window) m = MIMOSOFNN(r=n_input, rt=n_output, window=window) if (standard is not None): m = MIMOSOFNN(r=n_input, rt=n_output, window=window, delta=4, krmse=0.8) elif (machine == 'foselm'): window = int(window) m = FOSELM(1, window, 40, n_input) elif (machine == 'genefis'): m = MOGENEFIS(n_input, n_output) elif (machine == 'gsefs6'): m = MOGSEFS(n_input, n_output, 0.6) elif (machine == 'gsefs4'): m = MOGSEFS(n_input, n_output, 0.4) elif (machine == 'gsefs3'): m = MOGSEFS(n_input, n_output, 0.3) if (standard is not None): standard = int(standard) m = InputStandardizer(m, standard) assert (m is not None) obj_func_dict = { "Return": lambda x: total_return(get_returns(x)), "Sharpe Ratio": lambda x: sharpe_ratio(get_returns(x)), "Sortino Ratio": lambda x: sortino_ratio(get_returns(x)), "Mean Squared Error": lambda x: -mean_squared_error(list(process_test(x))), "Mean Average Error": lambda x: -mean_average_error(list(process_test(x))), "SMAPE": lambda x: -smape(list(process_test(x))), "Ln Q": lambda x: -ln_q(list(process_test(x))) } obj_func = obj_func_dict[function_text] events = list( test_model(m, t_train_data, t_test_data, t_sample_data, decision_maker, trade_cost)) obj_score = obj_func(events) return obj_score if machine in window_machines: kwargs1 = {'window': [1, 300], 'standard': [5, 100]} kwargs2 = {'window': [1, 300], 'standard': [10, 1000]} kwargs3 = {'window': [1, 300]} ret = optunity.maximize(obj_c, num_evals=50, **kwargs3) print(ret[0], ret[1][0]) ret = optunity.maximize(obj_c, num_evals=50, **kwargs1) print(ret[0], ret[1][0]) ret = optunity.maximize(obj_c, num_evals=50, **kwargs2) print(ret[0], ret[1][0]) if machine in panfis_machines: kwargs1 = {'standard': [5, 100]} kwargs2 = {'standard': [10, 1000]} kwargs3 = {'useless': [1, 2]} ret = optunity.maximize(obj_c, num_evals=1, **kwargs3) print(ret[0], ret[1][0]) ret = optunity.maximize(obj_c, num_evals=30, **kwargs1) print(ret[0], ret[1][0]) ret = optunity.maximize(obj_c, num_evals=30, **kwargs2) print(ret[0], ret[1][0]) print('Waktu = {}'.format(datetime.datetime.now()))
solvers = { 'particle swarm': 'ps', 'nelder-mead': 'nm', 'random search': 'rnd', 'grid search': 'g' } for algorithm, identifier in solvers.iteritems(): print("*" * 80) print("RUN - " + algorithm) print("*" * 80) optimap_params, info, _ = optunity.maximize( testFn, num_evals=numEvals, categoryScalingFactor=[0, 120], productScalingFactor=[0, 120], # stepSize=[0.8, 1.2], # regParam=[0.005, 0.015], solver_name=algorithm) print("Optimal parameters: " + str(optimap_params)) print("F1 of tuned model : %1.3f" % info.optimum) df = optunity.call_log2dataframe(info.call_log) df.to_csv(identifier + '_out.csv') # cutoff = 0.5 # fig = plt.figure() # ax = Axes3D(fig) # ax.scatter(xs=df[df.value > cutoff]['categoryScalingFactor'],
NUM_EPOCHS = num_epochs NUM_FOLDS = num_folds global main_logger main_logger = load_logger(logdir) #main_logger.debug('Parameters: ' + str(args)) box_constraints = load_box_constraints(box) main_logger.debug('Box Constraints: ' + str(box_constraints)) opt_fxn = get_objective_function(NUM_EPOCHS, logdir, utils.get_optimizer_from_str(update_fn)) opt_fxn = optunity.cross_validated(x=x, y=y, num_folds=NUM_FOLDS, strata=strata)(opt_fxn) main_logger.debug('Maximizing C-Index. Num_iterations: %d' % num_evals) opt_params, call_log, _ = optunity.maximize(opt_fxn, num_evals=num_evals, solver_name='sobol', **box_constraints) main_logger.debug('Optimal Parameters: ' + str(opt_params)) main_logger.debug('Saving Call log...') print(call_log._asdict()) save_call_log( os.path.join(logdir, 'optunity_log_%s.pkl' % (str(uuid.uuid4()))), call_log._asdict())
import optunity import optunity.metrics import sklearn.svm # score function: twice iterated 10-fold cross-validated accuracy @optunity.cross_validated(x=data, y=labels, num_folds=10, num_iter=2) def svm_auc(x_train, y_train, x_test, y_test, C, gamma): model = sklearn.svm.SVC(C=C, gamma=gamma).fit(x_train, y_train) decision_values = model.decision_function(x_test) return optunity.metrics.roc_auc(y_test, decision_values) # perform tuning optimal_pars, _, _ = optunity.maximize(svm_auc, num_evals=200, C=[0, 10], gamma=[0, 1]) # train model on the full training set with tuned hyperparameters optimal_model = sklearn.svm.SVC(**optimal_pars).fit(data, labels)