Ejemplo n.º 1
0
    def get_latest_prices(self,normalize=True):
        from history.tools import normalization, filter_by_mins
        splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments

        prices = Price.objects.filter(symbol=self.symbol).order_by('-created_on')
        prices = filter_by_mins(prices,self.granularity)
        prices = [price.price for price in prices]
        prices = list(prices[0:splice_point])
        if normalize:
            prices = normalization(prices)
        prices.reverse()
        return prices
Ejemplo n.º 2
0
    def get_latest_prices(self, normalize=True):
        from history.tools import normalization, filter_by_mins
        splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments

        prices = Price.objects.filter(symbol=self.symbol).order_by('-created_on')
        prices = filter_by_mins(prices, self.granularity)
        prices = [price.price for price in prices]
        prices = list(prices[0:splice_point])
        if normalize:
            prices = normalization(prices)
        prices.reverse()
        return prices
Ejemplo n.º 3
0
    def handle(self, *args, **options):
        ticker = args[0]
        print("****** STARTING PREDICTOR " + ticker + " ******* ")
        prices = Price.objects.filter(
            symbol=ticker).order_by('-created_on').values_list('price',
                                                               flat=True)
        data = normalization(list(prices[0:NUM_MINUTES_BACK].reverse()))
        data = [int(x * MULT_FACTOR) for x in data]
        print(data)

        ds = SupervisedDataSet(5, 1)
        try:
            for i, val in enumerate(data):
                DS.addSample((data[i], data[i + 1], data[i + 2], data[i + 3],
                              data[i + 4]), (data[i + 5], ))
        except Exception:
            pass

        net = buildNetwork(5,
                           40,
                           1,
                           hiddenclass=LSTMLayer,
                           outputbias=False,
                           recurrent=True)

        trainer = RPropMinusTrainer(net, dataset=ds)
        train_errors = []  # save errors for plotting later
        EPOCHS_PER_CYCLE = 5
        CYCLES = 100
        EPOCHS = EPOCHS_PER_CYCLE * CYCLES
        for i in xrange(CYCLES):
            trainer.trainEpochs(EPOCHS_PER_CYCLE)
            train_errors.append(trainer.testOnData())
            epoch = (i + 1) * EPOCHS_PER_CYCLE
            print("\r epoch {}/{}".format(epoch, EPOCHS), end="")
            stdout.flush()

        print()
        print("final error =", train_errors[-1])

        for sample, target in ds.getSequenceIterator(0):
            show_pred_sample = net.activate(sample) / MULT_FACTOR
            show_sample = sample / MULT_FACTOR
            show_target = target / MULT_FACTOR
            show_diff = show_pred_sample - show_target
            show_diff_pct = 100 * show_diff / show_pred_sample
            print("{} => {}, act {}. ({}%)".format(
                show_sample[0], round(show_pred_sample[0], 3), show_target[0],
                int(round(show_diff_pct[0], 0))))
Ejemplo n.º 4
0
    def handle(self, *args, **options):
        ticker = args[0]
        print("****** STARTING PREDICTOR " + ticker + " ******* ")
        prices = Price.objects.filter(symbol=ticker).order_by('-created_on').values_list('price',flat=True)
        data = normalization(list(prices[0:NUM_MINUTES_BACK].reverse()))
        data = [ int(x * MULT_FACTOR) for x in data]
        print(data)

        ds = SupervisedDataSet(5, 1)
        try:
            for i,val in enumerate(data):
                DS.addSample((data[i], data[i+1], data[i+2], data[i+3], data[i+4]), (data[i+5],))
        except Exception:
            pass;

        net = buildNetwork(5, 40, 1, 
                           hiddenclass=LSTMLayer, outputbias=False, recurrent=True)

        trainer = RPropMinusTrainer(net, dataset=ds)
        train_errors = [] # save errors for plotting later
        EPOCHS_PER_CYCLE = 5
        CYCLES = 100
        EPOCHS = EPOCHS_PER_CYCLE * CYCLES
        for i in xrange(CYCLES):
            trainer.trainEpochs(EPOCHS_PER_CYCLE)
            train_errors.append(trainer.testOnData())
            epoch = (i+1) * EPOCHS_PER_CYCLE
            print("\r epoch {}/{}".format(epoch, EPOCHS), end="")
            stdout.flush()

        print()
        print("final error =", train_errors[-1])

        for sample, target in ds.getSequenceIterator(0):
            show_pred_sample = net.activate(sample) / MULT_FACTOR
            show_sample = sample / MULT_FACTOR
            show_target = target / MULT_FACTOR
            show_diff = show_pred_sample - show_target
            show_diff_pct = 100 * show_diff / show_pred_sample
            print("{} => {}, act {}. ({}%)".format(show_sample[0],round(show_pred_sample[0],3),show_target[0],int(round(show_diff_pct[0],0))))
Ejemplo n.º 5
0
    def handle(self, *args, **options):
        # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
        import numpy as np
        import matplotlib.pyplot as plt
        from matplotlib.colors import ListedColormap
        from sklearn.cross_validation import train_test_split
        from sklearn.preprocessing import StandardScaler
        from sklearn.datasets import make_moons, make_circles, make_classification
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.svm import SVC
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
        from sklearn.naive_bayes import GaussianNB
        from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
        from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

        h = .02  # step size in the mesh

        names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
                 "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis",
                 "Quadratic Discriminant Analysis"]
        classifiers = [
            KNeighborsClassifier(3),
            SVC(kernel="linear", C=0.025),
            SVC(gamma=2, C=1),
            DecisionTreeClassifier(max_depth=5),
            RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
            AdaBoostClassifier(),
            GaussianNB(),
            LinearDiscriminantAnalysis(),
            QuadraticDiscriminantAnalysis()]

        X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                                   random_state=1, n_clusters_per_class=1)
        rng = np.random.RandomState(2)
        X += 2 * rng.uniform(size=X.shape)
        linearly_separable = (X, y)


        from history.tools import normalization, filter_by_mins, create_sample_row
        from history.models import Price

        graph = False
        self.symbol ='BTC_ETH'
        self.minutes_back = 100
        self.timedelta_back_in_granularity_increments = 0
        datasetinputs = 2
        gran_options = [1,5,15,30]
        gran_options = [30,60,120,240]
        datasets = []
        _names = []
        for gran in gran_options:
            self.granularity = gran

            splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments
            prices = Price.objects.filter(symbol=self.symbol).order_by('-created_on')
            prices = filter_by_mins(prices,self.granularity)
            prices = [price.price for price in prices]
            data = normalization(list(prices[0:splice_point]))
            data.reverse()

            price_datasets = [[],[]]
            for i,val in enumerate(data):
                try:
                    # get NN projection
                    sample = create_sample_row(data,i,datasetinputs)
                    last_price = data[i+datasetinputs-1]
                    next_price = data[i+datasetinputs]
                    change =  next_price - last_price
                    pct_change = change / last_price
                    fee_pct = 0.002
                    do_buy = -1 if abs(pct_change) < fee_pct and False else (1 if change > 0 else 0)
                    price_datasets[0].append(sample)
                    price_datasets[1].append(do_buy)
                except Exception as e:
                    print(e)
            datasets.append(price_datasets)
            _names.append(str(gran))


        if graph:
            figure = plt.figure(figsize=(27, 9))
        i = 1
        # iterate over datasets
        for _index in range(0,len(datasets)):
            ds = datasets[_index]
            # preprocess dataset, split into training and test part
            X, y = ds
            X = StandardScaler().fit_transform(X)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)

            x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
            y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                                 np.arange(y_min, y_max, h))

            # just plot the dataset first
            if graph:
                cm = plt.cm.RdBu
                cm_bright = ListedColormap(['#FF0000', '#0000FF'])
                ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
                # Plot the training points
                ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
                # and testing points
                ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
                ax.set_xlim(xx.min(), xx.max())
                ax.set_ylim(yy.min(), yy.max())
                ax.set_xticks(())
                ax.set_yticks(())
            i += 1

            # iterate over classifiers
            for name, clf in zip(names, classifiers):
                if graph:
                    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
                clf.fit(X_train, y_train)
                score = clf.score(X_test, y_test)
                # Plot the decision boundary. For that, we will assign a color to each
                # point in the mesh [x_min, m_max]x[y_min, y_max].
                _input = np.c_[xx.ravel(), yy.ravel()]
                if hasattr(clf, "decision_function"):
                    Z = clf.decision_function(_input)
                else:
                    Z = clf.predict_proba(_input)[:, 1]

                print(name,round(score*100))
                # Put the result into a color plot
                if graph:
                    Z = Z.reshape(xx.shape)
                    ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

                    # Plot also the training points
                    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
                    # and testing points
                    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
                               alpha=0.6)

                    ax.set_xlim(xx.min(), xx.max())
                    ax.set_ylim(yy.min(), yy.max())
                    ax.set_xticks(())
                    ax.set_yticks(())
                    ax.set_title("("+_names[_index]+")"+name)
                    text  = ('%.2f' % score).lstrip('0')
                    ax.text(xx.max() - .3, yy.min() + .3, text,
                            size=15, horizontalalignment='right')
                    i += 1

                stats = { 'r' : 0, 'w' :0 }
                for ds in datasets:
                    for i in range(0,len(ds[0])):
                        sample = ds[0][i]
                        actual = ds[1][i]
                        prediction = clf.predict(sample)
                        stats['r' if actual == prediction[0] else 'w'] =stats['r' if actual == prediction[0] else 'w'] + 1
                print('stats',name,stats,round((100.0*stats['r']/(stats['r']+stats['w'])),2))


        if graph:
            figure.subplots_adjust(left=.02, right=.98)
            plt.show()
Ejemplo n.º 6
0
    def handle(self, *args, **options):
        #http://scikit-learn.org/stable/auto_examples/classification/plot_classification_probability.html

        from history.tools import normalization, filter_by_mins, create_sample_row
        from history.models import Price

        graph = False
        self.symbol = 'BTC_ETH'
        self.minutes_back = 100
        self.timedelta_back_in_granularity_increments = 0
        datasetinputs = 2
        gran_options = [1, 5, 15, 30]
        gran_options = [30, 60, 120, 240]
        datasets = []
        _names = []
        for gran in gran_options:
            self.granularity = gran

            splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments
            prices = Price.objects.filter(
                symbol=self.symbol).order_by('-created_on')
            prices = filter_by_mins(prices, self.granularity)
            prices = [price.price for price in prices]
            data = normalization(list(prices[0:splice_point]))
            data.reverse()

            price_datasets = [[], []]
            for i, val in enumerate(data):
                try:
                    # get NN projection
                    sample = create_sample_row(data, i, datasetinputs)
                    last_price = data[i + datasetinputs - 1]
                    next_price = data[i + datasetinputs]
                    change = next_price - last_price
                    pct_change = change / last_price
                    fee_pct = 0.002
                    do_buy = -1 if abs(pct_change) < fee_pct and False else (
                        1 if change > 0 else 0)
                    price_datasets[0].append([x for x in sample])
                    price_datasets[1].append(do_buy)
                except Exception as e:
                    print(e)
            datasets.append(price_datasets)
            _names.append(str(gran))
            _datasets = datasets

            # Author: Alexandre Gramfort <*****@*****.**>
            # License: BSD 3 clause

            import matplotlib.pyplot as plt
            import numpy as np

            from sklearn.linear_model import LogisticRegression
            from sklearn.svm import SVC
            from sklearn import datasets

            iris = datasets.load_iris()
            X = iris.data[:, 0:
                          2]  # we only take the first two features for visualization
            y = iris.target

            _datasets = _datasets[0]
            X = np.ndarray(shape=(len(_datasets[0]), 2),
                           dtype=float,
                           buffer=np.array(_datasets[0]))
            y = np.array(_datasets[1])

            n_features = X.shape[1]

            C = 1.0

            # Create different classifiers. The logistic regression cannot do
            # multiclass out of the box.
            classifiers = {
                'L1 logistic':
                LogisticRegression(C=C, penalty='l1'),
                'L2 logistic (OvR)':
                LogisticRegression(C=C, penalty='l2'),
                'Linear SVC':
                SVC(kernel='linear', C=C, probability=True, random_state=0),
                'L2 logistic (Multinomial)':
                LogisticRegression(C=C,
                                   solver='lbfgs',
                                   multi_class='multinomial')
            }

            n_classifiers = len(classifiers)

            plt.figure(figsize=(3 * 2, n_classifiers * 2))
            plt.subplots_adjust(bottom=.2, top=.95)

            xx = np.linspace(3, 9, 100)
            yy = np.linspace(1, 5, 100).T
            xx, yy = np.meshgrid(xx, yy)
            Xfull = np.c_[xx.ravel(), yy.ravel()]

            for index, (name, classifier) in enumerate(classifiers.items()):
                classifier.fit(X, y)

                y_pred = classifier.predict(X)
                classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100
                print("classif_rate for %s : %f " % (name, classif_rate))

                # View probabilities=
                probas = classifier.predict_proba(Xfull)
                n_classes = np.unique(y_pred).size
                for k in range(n_classes):
                    plt.subplot(n_classifiers, n_classes,
                                index * n_classes + k + 1)
                    plt.title("Class %d" % k)
                    if k == 0:
                        plt.ylabel(name)
                    imshow_handle = plt.imshow(probas[:, k].reshape(
                        (100, 100)),
                                               extent=(3, 9, 1, 5),
                                               origin='lower')
                    plt.xticks(())
                    plt.yticks(())
                    idx = (y_pred == k)
                    if idx.any():
                        plt.scatter(X[idx, 0], X[idx, 1], marker='o', c='k')

            ax = plt.axes([0.15, 0.04, 0.7, 0.05])
            plt.title("Probability")
            plt.colorbar(imshow_handle, cax=ax, orientation='horizontal')

            plt.show()
Ejemplo n.º 7
0
    def handle(self, *args, **options):
        # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
        import numpy as np
        import matplotlib.pyplot as plt
        from matplotlib.colors import ListedColormap
        from sklearn.cross_validation import train_test_split
        from sklearn.preprocessing import StandardScaler
        from sklearn.datasets import make_moons, make_circles, make_classification
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.svm import SVC
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
        from sklearn.naive_bayes import GaussianNB
        from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
        from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

        h = .02  # step size in the mesh

        names = [
            "Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
            "Random Forest", "AdaBoost", "Naive Bayes",
            "Linear Discriminant Analysis", "Quadratic Discriminant Analysis"
        ]
        classifiers = [
            KNeighborsClassifier(3),
            SVC(kernel="linear", C=0.025),
            SVC(gamma=2, C=1),
            DecisionTreeClassifier(max_depth=5),
            RandomForestClassifier(max_depth=5,
                                   n_estimators=10,
                                   max_features=1),
            AdaBoostClassifier(),
            GaussianNB(),
            LinearDiscriminantAnalysis(),
            QuadraticDiscriminantAnalysis()
        ]

        X, y = make_classification(n_features=2,
                                   n_redundant=0,
                                   n_informative=2,
                                   random_state=1,
                                   n_clusters_per_class=1)
        rng = np.random.RandomState(2)
        X += 2 * rng.uniform(size=X.shape)
        linearly_separable = (X, y)

        from history.tools import normalization, filter_by_mins, create_sample_row
        from history.models import Price

        graph = False
        self.symbol = 'BTC_ETH'
        self.minutes_back = 100
        self.timedelta_back_in_granularity_increments = 0
        datasetinputs = 2
        gran_options = [1, 5, 15, 30]
        gran_options = [30, 60, 120, 240]
        datasets = []
        _names = []
        for gran in gran_options:
            self.granularity = gran

            splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments
            prices = Price.objects.filter(
                symbol=self.symbol).order_by('-created_on')
            prices = filter_by_mins(prices, self.granularity)
            prices = [price.price for price in prices]
            data = normalization(list(prices[0:splice_point]))
            data.reverse()

            price_datasets = [[], []]
            for i, val in enumerate(data):
                try:
                    # get NN projection
                    sample = create_sample_row(data, i, datasetinputs)
                    last_price = data[i + datasetinputs - 1]
                    next_price = data[i + datasetinputs]
                    change = next_price - last_price
                    pct_change = change / last_price
                    fee_pct = 0.002
                    do_buy = -1 if abs(pct_change) < fee_pct and False else (
                        1 if change > 0 else 0)
                    price_datasets[0].append(sample)
                    price_datasets[1].append(do_buy)
                except Exception as e:
                    print(e)
            datasets.append(price_datasets)
            _names.append(str(gran))

        if graph:
            figure = plt.figure(figsize=(27, 9))
        i = 1
        # iterate over datasets
        for _index in range(0, len(datasets)):
            ds = datasets[_index]
            # preprocess dataset, split into training and test part
            X, y = ds
            X = StandardScaler().fit_transform(X)
            X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=.4)

            x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
            y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                                 np.arange(y_min, y_max, h))

            # just plot the dataset first
            if graph:
                cm = plt.cm.RdBu
                cm_bright = ListedColormap(['#FF0000', '#0000FF'])
                ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
                # Plot the training points
                ax.scatter(X_train[:, 0],
                           X_train[:, 1],
                           c=y_train,
                           cmap=cm_bright)
                # and testing points
                ax.scatter(X_test[:, 0],
                           X_test[:, 1],
                           c=y_test,
                           cmap=cm_bright,
                           alpha=0.6)
                ax.set_xlim(xx.min(), xx.max())
                ax.set_ylim(yy.min(), yy.max())
                ax.set_xticks(())
                ax.set_yticks(())
            i += 1

            # iterate over classifiers
            for name, clf in zip(names, classifiers):
                if graph:
                    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
                clf.fit(X_train, y_train)
                score = clf.score(X_test, y_test)
                # Plot the decision boundary. For that, we will assign a color to each
                # point in the mesh [x_min, m_max]x[y_min, y_max].
                _input = np.c_[xx.ravel(), yy.ravel()]
                if hasattr(clf, "decision_function"):
                    Z = clf.decision_function(_input)
                else:
                    Z = clf.predict_proba(_input)[:, 1]

                print(name, round(score * 100))
                # Put the result into a color plot
                if graph:
                    Z = Z.reshape(xx.shape)
                    ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

                    # Plot also the training points
                    ax.scatter(X_train[:, 0],
                               X_train[:, 1],
                               c=y_train,
                               cmap=cm_bright)
                    # and testing points
                    ax.scatter(X_test[:, 0],
                               X_test[:, 1],
                               c=y_test,
                               cmap=cm_bright,
                               alpha=0.6)

                    ax.set_xlim(xx.min(), xx.max())
                    ax.set_ylim(yy.min(), yy.max())
                    ax.set_xticks(())
                    ax.set_yticks(())
                    ax.set_title("(" + _names[_index] + ")" + name)
                    text = ('%.2f' % score).lstrip('0')
                    ax.text(xx.max() - .3,
                            yy.min() + .3,
                            text,
                            size=15,
                            horizontalalignment='right')
                    i += 1

                stats = {'r': 0, 'w': 0}
                for ds in datasets:
                    for i in range(0, len(ds[0])):
                        sample = ds[0][i]
                        actual = ds[1][i]
                        prediction = clf.predict(sample)
                        stats['r' if actual == prediction[0] else 'w'] = stats[
                            'r' if actual == prediction[0] else 'w'] + 1
                print(
                    'stats', name, stats,
                    round((100.0 * stats['r'] / (stats['r'] + stats['w'])), 2))

        if graph:
            figure.subplots_adjust(left=.02, right=.98)
            plt.show()