Example #1
0
def test_svm(d, x_train, x_test, y_train, y_test, test_start_idx, linear=True):
    
    ts = d['t/100ms'][test_start_idx:]
    bids = d['bid/100ms'][test_start_idx:]
    offers = d['offer/100ms'][test_start_idx:]
    currency_pair = d.currency_pair
    
    n_centroids = [None,  200, 400] 
    cs = [1.0, 5.0]
    cut_thresholds = [.0005, .001, .0015,  0.002]
    transformations = ['prob', 'triangle']
    class_weights = [15, 30, 45] 
    results = {}
    profits = {} 
    best_profit = 0 
    best_desc = None 
    best_result = None 
    
    Params = namedtuple('Params', 'k transformation C pos_weight neg_weight cut_thresh')
    Result = namedtuple('Result', 'profit ntrades ppt accuracy tp fp tn fn tz fz')
    num_class_weights = len(class_weights)
    nparams = len(n_centroids) * len(cs) * len(transformations)  * num_class_weights * num_class_weights * len(cut_thresholds)
    print "Total param combinations: ", nparams
    for k in n_centroids:
        print "Training encoder with k = ", k
        e = encoder.FeatureEncoder(x_train, whiten=False, n_centroids=k)
        for t in transformations: 
            x_train_encoded = e.encode(x_train, transformation = t)
            x_test_encoded = e.encode(x_test, transformation = t) 
            for c in cs:
                if linear: svm = scikits.learn.svm.LinearSVC(C = c)
                else: svm = scikits.learn.svm.SVC(C = c)
                for pos_weight in class_weights:
                    for neg_weight in class_weights:
                        print "Training SVM with C=", c, "pos_weight=", pos_weight, "neg_weight =", neg_weight
                        svm.fit(x_train_encoded, y_train, class_weight = {1: pos_weight, 0:1, -1:neg_weight})
                        pred = svm.predict(x_test_encoded)
                        for cut in cut_thresholds:
                            desc = Params(k, t, c, pos_weight, neg_weight, cut)
                            profit_series = simulate.aggressive_with_hard_thresholds(ts, bids, offers, pred, currency_pair, max_loss_prct = cut)
                            sum_profit = np.sum(profit_series)
                            ntrades = np.sum(profit_series != 0)
                            if ntrades > 0: profit_per_trade = sum_profit / float(ntrades)
                            else: profit_per_trade = 0 
                            raw_accuracy, tp, fp, tn, fn, tz, fz = signals.accuracy(y_test, pred)
                            result = Result(sum_profit, ntrades, profit_per_trade, raw_accuracy, tp, fp, tn, fn, tz, fz)
                            print desc
                            print result
                            print
                            if result.profit > best_profit:
                                best_profit = result.profit
                                best_result = result
                                best_desc = desc
                            results[desc] = result
                            profits[desc] = result.profit 
    print "Best over all params:"
    print best_desc
    print best_result
    return profits, results 
Example #2
0
def test_knn(d, x_train, x_test, y_train, y_test, test_start_idx):
    
    ts = d['t/100ms'][test_start_idx:]
    bids = d['bid/100ms'][test_start_idx:]
    offers = d['offer/100ms'][test_start_idx:]
    currency_pair = d.currency_pair
    
    n_centroids = [None,  200, 400] 
    ks = [3, 7, 21, 49] 
    cut_thresholds = [.0015,  0.002]
    transformations = ['prob', 'triangle']
    results = {}
    profits = {} 
    best_profit = 0 
    best_desc = None 
    best_result = None 
    
    Params = namedtuple('Params', 'dict_size transformation k cut_thresh')
    Result = namedtuple('Result', 'profit ntrades ppt accuracy tp fp tn fn tz fz')

    nparams = len(n_centroids) * len(ks) * len(transformations)  * len(cut_thresholds)
    print "Total param combinations: ", nparams
    for nc in n_centroids:
        print "Training encoder with dict_size = ", nc
        e = encoder.FeatureEncoder(x_train, whiten=False, n_centroids=nc)
        for t in transformations:
            x_train_encoded = e.encode(x_train, transformation = t)
            x_test_encoded = e.encode(x_test, transformation = t) 
            for k in ks:
                model = scikits.learn.neighbors.NeighborsClassifier(n_neighbors = k, algorithm='ball')
                print "Using knn classifier with k = ", k 
                model.fit(x_train_encoded, y_train)
                pred = model.predict(x_test_encoded)
                for cut in cut_thresholds:
                    desc = Params(nc, t, k, cut)
                    profit_series = simulate.aggressive_with_hard_thresholds(ts, bids, offers, pred, currency_pair, max_loss_prct = cut)
                    sum_profit = np.sum(profit_series)
                    ntrades = np.sum(profit_series != 0)
                    if ntrades > 0: profit_per_trade = sum_profit / float(ntrades)
                    else: profit_per_trade = 0 
                    raw_accuracy, tp, fp, tn, fn, tz, fz = signals.accuracy(y_test, pred)
                    result = Result(sum_profit, ntrades, profit_per_trade, raw_accuracy, tp, fp, tn, fn, tz, fz)
                    print desc
                    print result
                    print
                    if result.profit > best_profit:
                        best_profit = result.profit
                        best_result = result
                        best_desc = desc
                    results[desc] = result
                    profits[desc] = result.profit 
    print "Best over all params:"
    print best_desc
    print best_result
    return profits, results 
def eval_prediction(ts, bids, offers, pred, actual, currency_pair, cut=0.0015):

    profit_series = simulate.aggressive_with_hard_thresholds(ts, bids, offers, pred, currency_pair, max_loss_prct = cut, max_hold_time=30000)
    #profit_series, _, _ = simulate.aggressive(ts, bids, offers, pred, currency_pair)
    sum_profit = np.sum(profit_series)
    ntrades = np.sum(profit_series != 0)
    if ntrades > 0: profit_per_trade = sum_profit / float(ntrades)
    else: profit_per_trade = 0 
    
    precision, tp, fp, tn, fn, tz, fz = three_class_precision(actual, pred)
    result = {
        'profit': sum_profit, 
        'ntrades': ntrades, 
        'ppt': profit_per_trade, 
        'precision': precision,
        'tp': tp, 'fp': fp, 
        'tn': tn,  'fn': fn, 
        'tz': tz, 'fz': fz
    }
    return result