Example #1
0
 def create_DS(self, data):
     size = self.datasetinputs
     DS = SupervisedDataSet(size, 1)
     try:
         for i, val in enumerate(data):
             sample = create_sample_row(data, i, size)
             target = data[i + size]
             DS.addSample(sample, (target, ))
     except Exception as e:
         if "list index out of range" not in str(e):
             print(e)
     return DS
Example #2
0
 def create_DS(self,data):
     size = self.datasetinputs
     DS = SupervisedDataSet(size, 1)
     try:
         for i,val in enumerate(data):
             sample = create_sample_row(data,i,size)
             target = data[i+size]
             DS.addSample(sample, (target,))
     except Exception as e:
         if "list index out of range" not in str(e):
             print(e)
     return DS
Example #3
0
    def get_classifier(self,train=True,test=True):

        all_output=""
        h = .02  # step size in the mesh
        self.names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
                 "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis",
                 "Quadratic Discriminant Analysis"]
        classifiers = [
            KNeighborsClassifier(3),
            SVC(kernel="linear", C=0.025),
            SVC(gamma=2, C=1),
            DecisionTreeClassifier(max_depth=5),
            RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
            AdaBoostClassifier(),
            GaussianNB(),
            LinearDiscriminantAnalysis(),
            QuadraticDiscriminantAnalysis()]

        for i in range(0,len(self.names)):
            if self.names[i] == self.name:
                clf = classifiers[i]

        if train:
            start_time = int(time.time())
            data = self.get_latest_prices(normalize=False)
            price_datasets = [[],[]]
            for i,val in enumerate(data):
                try:
                    # get classifier projection
                    sample = create_sample_row(data,i,self.datasetinputs)
                    last_price = data[i+self.datasetinputs-1]
                    next_price = data[i+self.datasetinputs]
                    change =  next_price - last_price
                    pct_change = change / last_price
                    fee_pct = 0.002 * 2 #fee x 2 since we'd need to clear both buy and sell fees to be profitable
                    do_buy = -1 if abs(pct_change) < fee_pct else (1 if change > 0 else 0)
                    price_datasets[0].append(sample)
                    price_datasets[1].append(do_buy)
                except Exception as e:
                    pass

            data = price_datasets
            if self.timedelta_back_in_granularity_increments == 0:
                train_data = data
                test_data = [[],[]]
            else:
                train_data = [data[0][0:(-1*self.timedelta_back_in_granularity_increments)],data[1][0:(-1*self.timedelta_back_in_granularity_increments)]]
                test_data = [data[0][len(train_data[0]):], data[1][len(train_data[1]):]]
            self.datasets = train_data

            X, y = train_data
            X = StandardScaler().fit_transform(X)
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=.4)

            self.x_min, self.x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
            self.y_min, self.y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
            self.xx, self.yy = np.meshgrid(np.arange(self.x_min, self.x_max, h),
                                 np.arange(self.y_min, self.y_max, h))


            clf.fit(self.X_train, self.y_train)
            score = clf.score(self.X_test, self.y_test)
            # Plot the decision boundary. For that, we will assign a color to each
            # point in the mesh [self.x_min, m_max]x[self.y_min, self.y_max].

            _input = np.c_[self.xx.ravel(), self.yy.ravel()]
            if hasattr(clf, "decision_function"):
                self.Z = clf.decision_function(_input)
            else:
                self.Z = clf.predict_proba(_input)[:, 1]
            if test and len(test_data) > 0:
                stats = { 'r' : 0, 'w' :0, 'p': {0:0, 1:0,-1:0}, 'a': {0:0, 1:0,-1:0} }
                ds = test_data
                for i in range(0,len(ds[0])):
                    sample = ds[0][i]
                    actual = ds[1][i]
                    sample = StandardScaler().fit_transform(sample)
                    prediction = clf.predict(sample)
                    self.prediction = prediction
                    stats['p'][prediction[0]] += 1
                    stats['a'][actual] += 1
                    stats['r' if actual == prediction[0] else 'w'] =stats['r' if actual == prediction[0] else 'w'] + 1
                pct_correct = (1.0*stats['r']/(stats['r']+stats['w']))
                all_output = all_output + str(('stats',self.name,round(pct_correct,2)))
                all_output = all_output + str(('stats_debug',stats))
                self.percent_correct = int(pct_correct*100)
                self.prediction_size = len(test_data[0])
            
            all_output = all_output + str((self.name,round(score*100)))
            self.score = score*100
            end_time = int(time.time())
            self.time = end_time - start_time
            self.output = all_output

        self.clf = clf

        return clf
Example #4
0
    def get_classifier(self, train=True, test=True):

        all_output = ""
        h = .02  # step size in the mesh
        self.names = [
            "Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
            "Random Forest", "AdaBoost", "Naive Bayes",
            "Linear Discriminant Analysis", "Quadratic Discriminant Analysis"
        ]
        classifiers = [
            KNeighborsClassifier(3),
            SVC(kernel="linear", C=0.025),
            SVC(gamma=2, C=1),
            DecisionTreeClassifier(max_depth=5),
            RandomForestClassifier(max_depth=5,
                                   n_estimators=10,
                                   max_features=1),
            AdaBoostClassifier(),
            GaussianNB(),
            LinearDiscriminantAnalysis(),
            QuadraticDiscriminantAnalysis()
        ]

        for i in range(0, len(self.names)):
            if self.names[i] == self.name:
                clf = classifiers[i]

        if train:
            start_time = int(time.time())
            data = self.get_latest_prices(normalize=False)
            price_datasets = [[], []]
            for i, val in enumerate(data):
                try:
                    # get classifier projection
                    sample = create_sample_row(data, i, self.datasetinputs)
                    last_price = data[i + self.datasetinputs - 1]
                    next_price = data[i + self.datasetinputs]
                    change = next_price - last_price
                    pct_change = change / last_price
                    fee_pct = get_fee_amount()
                    fee_pct = fee_pct * 2  # fee x 2 since we'd need to clear both buy and sell fees to be profitable
                    fee_pct = fee_pct * settings.FEE_MANAGEMENT_STRATEGY  # see desc in settings.py
                    do_buy = ClassifierTest.HOLD if abs(
                        pct_change) < fee_pct else (
                            ClassifierTest.BUY
                            if change > 0 else ClassifierTest.SELL)
                    price_datasets[0].append(sample)
                    price_datasets[1].append(do_buy)
                except Exception:
                    pass

            data = price_datasets
            if self.timedelta_back_in_granularity_increments == 0:
                train_data = data
                test_data = [[], []]
            else:
                train_data = [
                    data[0][0:(-1 *
                               self.timedelta_back_in_granularity_increments)],
                    data[1][0:(-1 *
                               self.timedelta_back_in_granularity_increments)]
                ]
                test_data = [
                    data[0][len(train_data[0]):], data[1][len(train_data[1]):]
                ]
            self.datasets = train_data

            X, y = train_data
            X = StandardScaler().fit_transform(X)
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
                X, y, test_size=.4)

            self.min = {}
            self.max = {}
            self.xz = ()
            mesh_args = []
            for i in range(0, self.datasetinputs):
                self.min[i], self.max[i] = X[:, i].min() - .5, X[:,
                                                                 i].max() + .5
                mesh_args.append(np.arange(self.min[i], self.max[i], h))
            self.xz = np.meshgrid(*mesh_args)

            clf.fit(self.X_train, self.y_train)
            score = clf.score(self.X_test, self.y_test)

            # Plot the decision boundary. For that, we will assign a color to each
            # point in the mesh [self.x_min, m_max]x[self.y_min, self.y_max].

            self.ravel_args = []
            for i in range(0, self.datasetinputs):
                self.ravel_args.append(self.xz[i].ravel())

            self._input = np.column_stack(self.ravel_args)

            if hasattr(clf, "decision_function"):
                self.Z = clf.decision_function(self._input)
            else:
                self.Z = clf.predict_proba(self._input)[:, 1]

            if test and len(test_data) > 0:
                stats = {
                    'r': 0,
                    'w': 0,
                    'p': {
                        0: 0,
                        1: 0,
                        -1: 0
                    },
                    'a': {
                        0: 0,
                        1: 0,
                        -1: 0
                    }
                }
                ds = test_data
                for i in range(0, len(ds[0])):
                    sample = ds[0][i]
                    actual = ds[1][i]
                    sample = StandardScaler().fit_transform(sample)
                    prediction = clf.predict(sample)
                    self.prediction = prediction
                    stats['p'][prediction[0]] += 1
                    stats['a'][actual] += 1
                    stats['r' if actual == prediction[0] else 'w'] = stats[
                        'r' if actual == prediction[0] else 'w'] + 1
                pct_correct = (1.0 * stats['r'] / (stats['r'] + stats['w']))
                all_output = all_output + str(
                    ('stats', self.name, round(pct_correct, 2)))
                all_output = all_output + str(('stats_debug', stats))
                self.percent_correct = int(pct_correct * 100)
                self.prediction_size = len(test_data[0])

            all_output = all_output + str((self.name, round(score * 100)))
            self.score = score * 100
            end_time = int(time.time())
            self.time = end_time - start_time
            self.output = all_output

        self.clf = clf

        return clf
Example #5
0
    def handle(self, *args, **options):
        # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
        import numpy as np
        import matplotlib.pyplot as plt
        from matplotlib.colors import ListedColormap
        from sklearn.cross_validation import train_test_split
        from sklearn.preprocessing import StandardScaler
        from sklearn.datasets import make_moons, make_circles, make_classification
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.svm import SVC
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
        from sklearn.naive_bayes import GaussianNB
        from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
        from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

        h = .02  # step size in the mesh

        names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
                 "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis",
                 "Quadratic Discriminant Analysis"]
        classifiers = [
            KNeighborsClassifier(3),
            SVC(kernel="linear", C=0.025),
            SVC(gamma=2, C=1),
            DecisionTreeClassifier(max_depth=5),
            RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
            AdaBoostClassifier(),
            GaussianNB(),
            LinearDiscriminantAnalysis(),
            QuadraticDiscriminantAnalysis()]

        X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                                   random_state=1, n_clusters_per_class=1)
        rng = np.random.RandomState(2)
        X += 2 * rng.uniform(size=X.shape)
        linearly_separable = (X, y)


        from history.tools import normalization, filter_by_mins, create_sample_row
        from history.models import Price

        graph = False
        self.symbol ='BTC_ETH'
        self.minutes_back = 100
        self.timedelta_back_in_granularity_increments = 0
        datasetinputs = 2
        gran_options = [1,5,15,30]
        gran_options = [30,60,120,240]
        datasets = []
        _names = []
        for gran in gran_options:
            self.granularity = gran

            splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments
            prices = Price.objects.filter(symbol=self.symbol).order_by('-created_on')
            prices = filter_by_mins(prices,self.granularity)
            prices = [price.price for price in prices]
            data = normalization(list(prices[0:splice_point]))
            data.reverse()

            price_datasets = [[],[]]
            for i,val in enumerate(data):
                try:
                    # get NN projection
                    sample = create_sample_row(data,i,datasetinputs)
                    last_price = data[i+datasetinputs-1]
                    next_price = data[i+datasetinputs]
                    change =  next_price - last_price
                    pct_change = change / last_price
                    fee_pct = 0.002
                    do_buy = -1 if abs(pct_change) < fee_pct and False else (1 if change > 0 else 0)
                    price_datasets[0].append(sample)
                    price_datasets[1].append(do_buy)
                except Exception as e:
                    print(e)
            datasets.append(price_datasets)
            _names.append(str(gran))


        if graph:
            figure = plt.figure(figsize=(27, 9))
        i = 1
        # iterate over datasets
        for _index in range(0,len(datasets)):
            ds = datasets[_index]
            # preprocess dataset, split into training and test part
            X, y = ds
            X = StandardScaler().fit_transform(X)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)

            x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
            y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                                 np.arange(y_min, y_max, h))

            # just plot the dataset first
            if graph:
                cm = plt.cm.RdBu
                cm_bright = ListedColormap(['#FF0000', '#0000FF'])
                ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
                # Plot the training points
                ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
                # and testing points
                ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
                ax.set_xlim(xx.min(), xx.max())
                ax.set_ylim(yy.min(), yy.max())
                ax.set_xticks(())
                ax.set_yticks(())
            i += 1

            # iterate over classifiers
            for name, clf in zip(names, classifiers):
                if graph:
                    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
                clf.fit(X_train, y_train)
                score = clf.score(X_test, y_test)
                # Plot the decision boundary. For that, we will assign a color to each
                # point in the mesh [x_min, m_max]x[y_min, y_max].
                _input = np.c_[xx.ravel(), yy.ravel()]
                if hasattr(clf, "decision_function"):
                    Z = clf.decision_function(_input)
                else:
                    Z = clf.predict_proba(_input)[:, 1]

                print(name,round(score*100))
                # Put the result into a color plot
                if graph:
                    Z = Z.reshape(xx.shape)
                    ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

                    # Plot also the training points
                    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
                    # and testing points
                    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
                               alpha=0.6)

                    ax.set_xlim(xx.min(), xx.max())
                    ax.set_ylim(yy.min(), yy.max())
                    ax.set_xticks(())
                    ax.set_yticks(())
                    ax.set_title("("+_names[_index]+")"+name)
                    text  = ('%.2f' % score).lstrip('0')
                    ax.text(xx.max() - .3, yy.min() + .3, text,
                            size=15, horizontalalignment='right')
                    i += 1

                stats = { 'r' : 0, 'w' :0 }
                for ds in datasets:
                    for i in range(0,len(ds[0])):
                        sample = ds[0][i]
                        actual = ds[1][i]
                        prediction = clf.predict(sample)
                        stats['r' if actual == prediction[0] else 'w'] =stats['r' if actual == prediction[0] else 'w'] + 1
                print('stats',name,stats,round((100.0*stats['r']/(stats['r']+stats['w'])),2))


        if graph:
            figure.subplots_adjust(left=.02, right=.98)
            plt.show()
Example #6
0
def predict_v2(ticker, hidden_layers=15, NUM_MINUTES_BACK=1000, NUM_EPOCHS=1000, granularity_minutes=15,
               datasetinputs=5, learningrate=0.005, bias=False, momentum=0.1, weightdecay=0.0, recurrent=False,
               timedelta_back_in_granularity_increments=0):

    # setup
    print_and_log("(p)starting ticker:{} hidden:{} min:{} epoch:{} gran:{} dsinputs:{} learningrate:{} bias:{} momentum:{} weightdecay:{}\
                  recurrent:{}, timedelta_back_in_granularity_increments:{} ".format(
                  ticker, hidden_layers, NUM_MINUTES_BACK, NUM_EPOCHS, granularity_minutes, datasetinputs,
                  learningrate, bias, momentum, weightdecay, recurrent, timedelta_back_in_granularity_increments))
    pt = PredictionTest()
    pt.type = 'mock'
    pt.symbol = ticker
    pt.datasetinputs = datasetinputs
    pt.hiddenneurons = hidden_layers
    pt.minutes_back = NUM_MINUTES_BACK
    pt.epochs = NUM_EPOCHS
    pt.momentum = momentum
    pt.granularity = granularity_minutes
    pt.bias = bias
    pt.bias_chart = -1 if pt.bias is None else (1 if pt.bias else 0)
    pt.learningrate = learningrate
    pt.weightdecay = weightdecay
    pt.recurrent = recurrent
    pt.recurrent_chart = -1 if pt.recurrent is None else (1 if pt.recurrent else 0)
    pt.timedelta_back_in_granularity_increments = timedelta_back_in_granularity_increments
    all_output = ""
    start_time = int(time.time())

    # get neural network & data
    pt.get_nn()
    sample_data, test_data = pt.get_train_and_test_data()

    # output / testing
    round_to = 2
    num_times_directionally_correct = 0
    num_times = 0
    diffs = []
    profitloss_pct = []
    for i, val in enumerate(test_data):
        try:
            # get NN projection
            sample = create_sample_row(test_data, i, datasetinputs)
            recommend, nn_price, last_sample, projected_change_pct = pt.predict(sample)

            # calculate profitability
            actual_price = test_data[i+datasetinputs]
            diff = nn_price - actual_price
            diff_pct = 100 * diff / actual_price
            directionally_correct = ((actual_price - last_sample) > 0 and (nn_price - last_sample) > 0) \
                or ((actual_price - last_sample) < 0 and (nn_price - last_sample) < 0)
            if recommend != 'HOLD':
                profitloss_pct = profitloss_pct + [abs((actual_price - last_sample) / last_sample) *
                                                   (1 if directionally_correct else -1)]
            if directionally_correct:
                num_times_directionally_correct = num_times_directionally_correct + 1
            num_times = num_times + 1
            diffs.append(diff)
            output = "{}) seq ending in {} => {} (act {}, {}/{} pct off); Recommend: {}; Was Directionally Correct:{}\
                    ".format(i, round(actual_price, round_to), round(nn_price, round_to),
                             round(actual_price, round_to), round(diff, round_to), round(diff_pct, 1),
                             recommend, directionally_correct)
            all_output = all_output + "\n" + output
        except Exception as e:
            if "list index out of range" not in str(e):
                print_and_log("(p)"+str(e))
            pass

    avg_diff = sum([abs(diff[0]) for diff in diffs]) / num_times  # noqa
    pct_correct = 100 * num_times_directionally_correct / num_times
    modeled_profit_loss = sum(profitloss_pct) / len(profitloss_pct)
    output = 'directionally correct {} of {} times.  {}%.  avg diff={}, profit={}'.format(
        num_times_directionally_correct, num_times, round(pct_correct, 0), round(avg_diff, 4),
        round(modeled_profit_loss, 3))
    print_and_log("(p)"+output)
    all_output = all_output + "\n" + output

    end_time = int(time.time())
    pt.time = end_time - start_time
    pt.prediction_size = len(diffs)
    pt.output = all_output
    pt.percent_correct = pct_correct
    pt.avg_diff = avg_diff
    pt.profitloss = modeled_profit_loss
    pt.profitloss_int = int(pt.profitloss * 100)
    pt.save()

    return pt.pk
Example #7
0
    def handle(self, *args, **options):
        #http://scikit-learn.org/stable/auto_examples/classification/plot_classification_probability.html

        from history.tools import normalization, filter_by_mins, create_sample_row
        from history.models import Price

        graph = False
        self.symbol = 'BTC_ETH'
        self.minutes_back = 100
        self.timedelta_back_in_granularity_increments = 0
        datasetinputs = 2
        gran_options = [1, 5, 15, 30]
        gran_options = [30, 60, 120, 240]
        datasets = []
        _names = []
        for gran in gran_options:
            self.granularity = gran

            splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments
            prices = Price.objects.filter(
                symbol=self.symbol).order_by('-created_on')
            prices = filter_by_mins(prices, self.granularity)
            prices = [price.price for price in prices]
            data = normalization(list(prices[0:splice_point]))
            data.reverse()

            price_datasets = [[], []]
            for i, val in enumerate(data):
                try:
                    # get NN projection
                    sample = create_sample_row(data, i, datasetinputs)
                    last_price = data[i + datasetinputs - 1]
                    next_price = data[i + datasetinputs]
                    change = next_price - last_price
                    pct_change = change / last_price
                    fee_pct = 0.002
                    do_buy = -1 if abs(pct_change) < fee_pct and False else (
                        1 if change > 0 else 0)
                    price_datasets[0].append([x for x in sample])
                    price_datasets[1].append(do_buy)
                except Exception as e:
                    print(e)
            datasets.append(price_datasets)
            _names.append(str(gran))
            _datasets = datasets

            # Author: Alexandre Gramfort <*****@*****.**>
            # License: BSD 3 clause

            import matplotlib.pyplot as plt
            import numpy as np

            from sklearn.linear_model import LogisticRegression
            from sklearn.svm import SVC
            from sklearn import datasets

            iris = datasets.load_iris()
            X = iris.data[:, 0:
                          2]  # we only take the first two features for visualization
            y = iris.target

            _datasets = _datasets[0]
            X = np.ndarray(shape=(len(_datasets[0]), 2),
                           dtype=float,
                           buffer=np.array(_datasets[0]))
            y = np.array(_datasets[1])

            n_features = X.shape[1]

            C = 1.0

            # Create different classifiers. The logistic regression cannot do
            # multiclass out of the box.
            classifiers = {
                'L1 logistic':
                LogisticRegression(C=C, penalty='l1'),
                'L2 logistic (OvR)':
                LogisticRegression(C=C, penalty='l2'),
                'Linear SVC':
                SVC(kernel='linear', C=C, probability=True, random_state=0),
                'L2 logistic (Multinomial)':
                LogisticRegression(C=C,
                                   solver='lbfgs',
                                   multi_class='multinomial')
            }

            n_classifiers = len(classifiers)

            plt.figure(figsize=(3 * 2, n_classifiers * 2))
            plt.subplots_adjust(bottom=.2, top=.95)

            xx = np.linspace(3, 9, 100)
            yy = np.linspace(1, 5, 100).T
            xx, yy = np.meshgrid(xx, yy)
            Xfull = np.c_[xx.ravel(), yy.ravel()]

            for index, (name, classifier) in enumerate(classifiers.items()):
                classifier.fit(X, y)

                y_pred = classifier.predict(X)
                classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100
                print("classif_rate for %s : %f " % (name, classif_rate))

                # View probabilities=
                probas = classifier.predict_proba(Xfull)
                n_classes = np.unique(y_pred).size
                for k in range(n_classes):
                    plt.subplot(n_classifiers, n_classes,
                                index * n_classes + k + 1)
                    plt.title("Class %d" % k)
                    if k == 0:
                        plt.ylabel(name)
                    imshow_handle = plt.imshow(probas[:, k].reshape(
                        (100, 100)),
                                               extent=(3, 9, 1, 5),
                                               origin='lower')
                    plt.xticks(())
                    plt.yticks(())
                    idx = (y_pred == k)
                    if idx.any():
                        plt.scatter(X[idx, 0], X[idx, 1], marker='o', c='k')

            ax = plt.axes([0.15, 0.04, 0.7, 0.05])
            plt.title("Probability")
            plt.colorbar(imshow_handle, cax=ax, orientation='horizontal')

            plt.show()
Example #8
0
def predict_v2(ticker,
               hidden_layers=15,
               NUM_MINUTES_BACK=1000,
               NUM_EPOCHS=1000,
               granularity_minutes=15,
               datasetinputs=5,
               learningrate=0.005,
               bias=False,
               momentum=0.1,
               weightdecay=0.0,
               recurrent=False,
               timedelta_back_in_granularity_increments=0):

    # setup
    print_and_log(
        "(p)starting ticker:{} hidden:{} min:{} epoch:{} gran:{} dsinputs:{} learningrate:{} bias:{} momentum:{} weightdecay:{}\
                  recurrent:{}, timedelta_back_in_granularity_increments:{} ".
        format(ticker, hidden_layers, NUM_MINUTES_BACK, NUM_EPOCHS,
               granularity_minutes, datasetinputs, learningrate, bias,
               momentum, weightdecay, recurrent,
               timedelta_back_in_granularity_increments))
    pt = PredictionTest()
    pt.type = 'mock'
    pt.symbol = ticker
    pt.datasetinputs = datasetinputs
    pt.hiddenneurons = hidden_layers
    pt.minutes_back = NUM_MINUTES_BACK
    pt.epochs = NUM_EPOCHS
    pt.momentum = momentum
    pt.granularity = granularity_minutes
    pt.bias = bias
    pt.bias_chart = -1 if pt.bias is None else (1 if pt.bias else 0)
    pt.learningrate = learningrate
    pt.weightdecay = weightdecay
    pt.recurrent = recurrent
    pt.recurrent_chart = -1 if pt.recurrent is None else (
        1 if pt.recurrent else 0)
    pt.timedelta_back_in_granularity_increments = timedelta_back_in_granularity_increments
    all_output = ""
    start_time = int(time.time())

    # get neural network & data
    pt.get_nn()
    sample_data, test_data = pt.get_train_and_test_data()

    # output / testing
    round_to = 2
    num_times_directionally_correct = 0
    num_times = 0
    diffs = []
    profitloss_pct = []
    for i, val in enumerate(test_data):
        try:
            # get NN projection
            sample = create_sample_row(test_data, i, datasetinputs)
            recommend, nn_price, last_sample, projected_change_pct = pt.predict(
                sample)

            # calculate profitability
            actual_price = test_data[i + datasetinputs]
            diff = nn_price - actual_price
            diff_pct = 100 * diff / actual_price
            directionally_correct = ((actual_price - last_sample) > 0 and (nn_price - last_sample) > 0) \
                or ((actual_price - last_sample) < 0 and (nn_price - last_sample) < 0)
            if recommend != 'HOLD':
                profitloss_pct = profitloss_pct + [
                    abs((actual_price - last_sample) / last_sample) *
                    (1 if directionally_correct else -1)
                ]
            if directionally_correct:
                num_times_directionally_correct = num_times_directionally_correct + 1
            num_times = num_times + 1
            diffs.append(diff)
            output = "{}) seq ending in {} => {} (act {}, {}/{} pct off); Recommend: {}; Was Directionally Correct:{}\
                    ".format(i, round(actual_price, round_to),
                             round(nn_price, round_to),
                             round(actual_price, round_to),
                             round(diff, round_to), round(diff_pct, 1),
                             recommend, directionally_correct)
            all_output = all_output + "\n" + output
        except Exception as e:
            if "list index out of range" not in str(e):
                print_and_log("(p)" + str(e))
            pass

    avg_diff = sum([abs(diff[0]) for diff in diffs]) / num_times  # noqa
    pct_correct = 100 * num_times_directionally_correct / num_times
    modeled_profit_loss = sum(profitloss_pct) / len(profitloss_pct)
    output = 'directionally correct {} of {} times.  {}%.  avg diff={}, profit={}'.format(
        num_times_directionally_correct, num_times, round(pct_correct, 0),
        round(avg_diff, 4), round(modeled_profit_loss, 3))
    print_and_log("(p)" + output)
    all_output = all_output + "\n" + output

    end_time = int(time.time())
    pt.time = end_time - start_time
    pt.prediction_size = len(diffs)
    pt.output = all_output
    pt.percent_correct = pct_correct
    pt.avg_diff = avg_diff
    pt.profitloss = modeled_profit_loss
    pt.profitloss_int = int(pt.profitloss * 100)
    pt.save()

    return pt.pk
Example #9
0
    def handle(self, *args, **options):
        # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
        import numpy as np
        import matplotlib.pyplot as plt
        from matplotlib.colors import ListedColormap
        from sklearn.cross_validation import train_test_split
        from sklearn.preprocessing import StandardScaler
        from sklearn.datasets import make_moons, make_circles, make_classification
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.svm import SVC
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
        from sklearn.naive_bayes import GaussianNB
        from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
        from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

        h = .02  # step size in the mesh

        names = [
            "Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
            "Random Forest", "AdaBoost", "Naive Bayes",
            "Linear Discriminant Analysis", "Quadratic Discriminant Analysis"
        ]
        classifiers = [
            KNeighborsClassifier(3),
            SVC(kernel="linear", C=0.025),
            SVC(gamma=2, C=1),
            DecisionTreeClassifier(max_depth=5),
            RandomForestClassifier(max_depth=5,
                                   n_estimators=10,
                                   max_features=1),
            AdaBoostClassifier(),
            GaussianNB(),
            LinearDiscriminantAnalysis(),
            QuadraticDiscriminantAnalysis()
        ]

        X, y = make_classification(n_features=2,
                                   n_redundant=0,
                                   n_informative=2,
                                   random_state=1,
                                   n_clusters_per_class=1)
        rng = np.random.RandomState(2)
        X += 2 * rng.uniform(size=X.shape)
        linearly_separable = (X, y)

        from history.tools import normalization, filter_by_mins, create_sample_row
        from history.models import Price

        graph = False
        self.symbol = 'BTC_ETH'
        self.minutes_back = 100
        self.timedelta_back_in_granularity_increments = 0
        datasetinputs = 2
        gran_options = [1, 5, 15, 30]
        gran_options = [30, 60, 120, 240]
        datasets = []
        _names = []
        for gran in gran_options:
            self.granularity = gran

            splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments
            prices = Price.objects.filter(
                symbol=self.symbol).order_by('-created_on')
            prices = filter_by_mins(prices, self.granularity)
            prices = [price.price for price in prices]
            data = normalization(list(prices[0:splice_point]))
            data.reverse()

            price_datasets = [[], []]
            for i, val in enumerate(data):
                try:
                    # get NN projection
                    sample = create_sample_row(data, i, datasetinputs)
                    last_price = data[i + datasetinputs - 1]
                    next_price = data[i + datasetinputs]
                    change = next_price - last_price
                    pct_change = change / last_price
                    fee_pct = 0.002
                    do_buy = -1 if abs(pct_change) < fee_pct and False else (
                        1 if change > 0 else 0)
                    price_datasets[0].append(sample)
                    price_datasets[1].append(do_buy)
                except Exception as e:
                    print(e)
            datasets.append(price_datasets)
            _names.append(str(gran))

        if graph:
            figure = plt.figure(figsize=(27, 9))
        i = 1
        # iterate over datasets
        for _index in range(0, len(datasets)):
            ds = datasets[_index]
            # preprocess dataset, split into training and test part
            X, y = ds
            X = StandardScaler().fit_transform(X)
            X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=.4)

            x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
            y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                                 np.arange(y_min, y_max, h))

            # just plot the dataset first
            if graph:
                cm = plt.cm.RdBu
                cm_bright = ListedColormap(['#FF0000', '#0000FF'])
                ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
                # Plot the training points
                ax.scatter(X_train[:, 0],
                           X_train[:, 1],
                           c=y_train,
                           cmap=cm_bright)
                # and testing points
                ax.scatter(X_test[:, 0],
                           X_test[:, 1],
                           c=y_test,
                           cmap=cm_bright,
                           alpha=0.6)
                ax.set_xlim(xx.min(), xx.max())
                ax.set_ylim(yy.min(), yy.max())
                ax.set_xticks(())
                ax.set_yticks(())
            i += 1

            # iterate over classifiers
            for name, clf in zip(names, classifiers):
                if graph:
                    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
                clf.fit(X_train, y_train)
                score = clf.score(X_test, y_test)
                # Plot the decision boundary. For that, we will assign a color to each
                # point in the mesh [x_min, m_max]x[y_min, y_max].
                _input = np.c_[xx.ravel(), yy.ravel()]
                if hasattr(clf, "decision_function"):
                    Z = clf.decision_function(_input)
                else:
                    Z = clf.predict_proba(_input)[:, 1]

                print(name, round(score * 100))
                # Put the result into a color plot
                if graph:
                    Z = Z.reshape(xx.shape)
                    ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

                    # Plot also the training points
                    ax.scatter(X_train[:, 0],
                               X_train[:, 1],
                               c=y_train,
                               cmap=cm_bright)
                    # and testing points
                    ax.scatter(X_test[:, 0],
                               X_test[:, 1],
                               c=y_test,
                               cmap=cm_bright,
                               alpha=0.6)

                    ax.set_xlim(xx.min(), xx.max())
                    ax.set_ylim(yy.min(), yy.max())
                    ax.set_xticks(())
                    ax.set_yticks(())
                    ax.set_title("(" + _names[_index] + ")" + name)
                    text = ('%.2f' % score).lstrip('0')
                    ax.text(xx.max() - .3,
                            yy.min() + .3,
                            text,
                            size=15,
                            horizontalalignment='right')
                    i += 1

                stats = {'r': 0, 'w': 0}
                for ds in datasets:
                    for i in range(0, len(ds[0])):
                        sample = ds[0][i]
                        actual = ds[1][i]
                        prediction = clf.predict(sample)
                        stats['r' if actual == prediction[0] else 'w'] = stats[
                            'r' if actual == prediction[0] else 'w'] + 1
                print(
                    'stats', name, stats,
                    round((100.0 * stats['r'] / (stats['r'] + stats['w'])), 2))

        if graph:
            figure.subplots_adjust(left=.02, right=.98)
            plt.show()