def test_svm(d, x_train, x_test, y_train, y_test, test_start_idx, linear=True): ts = d['t/100ms'][test_start_idx:] bids = d['bid/100ms'][test_start_idx:] offers = d['offer/100ms'][test_start_idx:] currency_pair = d.currency_pair n_centroids = [None, 200, 400] cs = [1.0, 5.0] cut_thresholds = [.0005, .001, .0015, 0.002] transformations = ['prob', 'triangle'] class_weights = [15, 30, 45] results = {} profits = {} best_profit = 0 best_desc = None best_result = None Params = namedtuple('Params', 'k transformation C pos_weight neg_weight cut_thresh') Result = namedtuple('Result', 'profit ntrades ppt accuracy tp fp tn fn tz fz') num_class_weights = len(class_weights) nparams = len(n_centroids) * len(cs) * len(transformations) * num_class_weights * num_class_weights * len(cut_thresholds) print "Total param combinations: ", nparams for k in n_centroids: print "Training encoder with k = ", k e = encoder.FeatureEncoder(x_train, whiten=False, n_centroids=k) for t in transformations: x_train_encoded = e.encode(x_train, transformation = t) x_test_encoded = e.encode(x_test, transformation = t) for c in cs: if linear: svm = scikits.learn.svm.LinearSVC(C = c) else: svm = scikits.learn.svm.SVC(C = c) for pos_weight in class_weights: for neg_weight in class_weights: print "Training SVM with C=", c, "pos_weight=", pos_weight, "neg_weight =", neg_weight svm.fit(x_train_encoded, y_train, class_weight = {1: pos_weight, 0:1, -1:neg_weight}) pred = svm.predict(x_test_encoded) for cut in cut_thresholds: desc = Params(k, t, c, pos_weight, neg_weight, cut) profit_series = simulate.aggressive_with_hard_thresholds(ts, bids, offers, pred, currency_pair, max_loss_prct = cut) sum_profit = np.sum(profit_series) ntrades = np.sum(profit_series != 0) if ntrades > 0: profit_per_trade = sum_profit / float(ntrades) else: profit_per_trade = 0 raw_accuracy, tp, fp, tn, fn, tz, fz = signals.accuracy(y_test, pred) result = Result(sum_profit, ntrades, profit_per_trade, raw_accuracy, tp, fp, tn, fn, tz, fz) print desc print result print if result.profit > best_profit: best_profit = result.profit best_result = result best_desc = desc results[desc] = result profits[desc] = result.profit print "Best over all params:" print best_desc print best_result return profits, results
def test_knn(d, x_train, x_test, y_train, y_test, test_start_idx): ts = d['t/100ms'][test_start_idx:] bids = d['bid/100ms'][test_start_idx:] offers = d['offer/100ms'][test_start_idx:] currency_pair = d.currency_pair n_centroids = [None, 200, 400] ks = [3, 7, 21, 49] cut_thresholds = [.0015, 0.002] transformations = ['prob', 'triangle'] results = {} profits = {} best_profit = 0 best_desc = None best_result = None Params = namedtuple('Params', 'dict_size transformation k cut_thresh') Result = namedtuple('Result', 'profit ntrades ppt accuracy tp fp tn fn tz fz') nparams = len(n_centroids) * len(ks) * len(transformations) * len(cut_thresholds) print "Total param combinations: ", nparams for nc in n_centroids: print "Training encoder with dict_size = ", nc e = encoder.FeatureEncoder(x_train, whiten=False, n_centroids=nc) for t in transformations: x_train_encoded = e.encode(x_train, transformation = t) x_test_encoded = e.encode(x_test, transformation = t) for k in ks: model = scikits.learn.neighbors.NeighborsClassifier(n_neighbors = k, algorithm='ball') print "Using knn classifier with k = ", k model.fit(x_train_encoded, y_train) pred = model.predict(x_test_encoded) for cut in cut_thresholds: desc = Params(nc, t, k, cut) profit_series = simulate.aggressive_with_hard_thresholds(ts, bids, offers, pred, currency_pair, max_loss_prct = cut) sum_profit = np.sum(profit_series) ntrades = np.sum(profit_series != 0) if ntrades > 0: profit_per_trade = sum_profit / float(ntrades) else: profit_per_trade = 0 raw_accuracy, tp, fp, tn, fn, tz, fz = signals.accuracy(y_test, pred) result = Result(sum_profit, ntrades, profit_per_trade, raw_accuracy, tp, fp, tn, fn, tz, fz) print desc print result print if result.profit > best_profit: best_profit = result.profit best_result = result best_desc = desc results[desc] = result profits[desc] = result.profit print "Best over all params:" print best_desc print best_result return profits, results