def create_DS(self, data): size = self.datasetinputs DS = SupervisedDataSet(size, 1) try: for i, val in enumerate(data): sample = create_sample_row(data, i, size) target = data[i + size] DS.addSample(sample, (target, )) except Exception as e: if "list index out of range" not in str(e): print(e) return DS
def create_DS(self,data): size = self.datasetinputs DS = SupervisedDataSet(size, 1) try: for i,val in enumerate(data): sample = create_sample_row(data,i,size) target = data[i+size] DS.addSample(sample, (target,)) except Exception as e: if "list index out of range" not in str(e): print(e) return DS
def get_classifier(self,train=True,test=True): all_output="" h = .02 # step size in the mesh self.names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree", "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis", "Quadratic Discriminant Analysis"] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis()] for i in range(0,len(self.names)): if self.names[i] == self.name: clf = classifiers[i] if train: start_time = int(time.time()) data = self.get_latest_prices(normalize=False) price_datasets = [[],[]] for i,val in enumerate(data): try: # get classifier projection sample = create_sample_row(data,i,self.datasetinputs) last_price = data[i+self.datasetinputs-1] next_price = data[i+self.datasetinputs] change = next_price - last_price pct_change = change / last_price fee_pct = 0.002 * 2 #fee x 2 since we'd need to clear both buy and sell fees to be profitable do_buy = -1 if abs(pct_change) < fee_pct else (1 if change > 0 else 0) price_datasets[0].append(sample) price_datasets[1].append(do_buy) except Exception as e: pass data = price_datasets if self.timedelta_back_in_granularity_increments == 0: train_data = data test_data = [[],[]] else: train_data = [data[0][0:(-1*self.timedelta_back_in_granularity_increments)],data[1][0:(-1*self.timedelta_back_in_granularity_increments)]] test_data = [data[0][len(train_data[0]):], data[1][len(train_data[1]):]] self.datasets = train_data X, y = train_data X = StandardScaler().fit_transform(X) self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=.4) self.x_min, self.x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 self.y_min, self.y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 self.xx, self.yy = np.meshgrid(np.arange(self.x_min, self.x_max, h), np.arange(self.y_min, self.y_max, h)) clf.fit(self.X_train, self.y_train) score = clf.score(self.X_test, self.y_test) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [self.x_min, m_max]x[self.y_min, self.y_max]. _input = np.c_[self.xx.ravel(), self.yy.ravel()] if hasattr(clf, "decision_function"): self.Z = clf.decision_function(_input) else: self.Z = clf.predict_proba(_input)[:, 1] if test and len(test_data) > 0: stats = { 'r' : 0, 'w' :0, 'p': {0:0, 1:0,-1:0}, 'a': {0:0, 1:0,-1:0} } ds = test_data for i in range(0,len(ds[0])): sample = ds[0][i] actual = ds[1][i] sample = StandardScaler().fit_transform(sample) prediction = clf.predict(sample) self.prediction = prediction stats['p'][prediction[0]] += 1 stats['a'][actual] += 1 stats['r' if actual == prediction[0] else 'w'] =stats['r' if actual == prediction[0] else 'w'] + 1 pct_correct = (1.0*stats['r']/(stats['r']+stats['w'])) all_output = all_output + str(('stats',self.name,round(pct_correct,2))) all_output = all_output + str(('stats_debug',stats)) self.percent_correct = int(pct_correct*100) self.prediction_size = len(test_data[0]) all_output = all_output + str((self.name,round(score*100))) self.score = score*100 end_time = int(time.time()) self.time = end_time - start_time self.output = all_output self.clf = clf return clf
def get_classifier(self, train=True, test=True): all_output = "" h = .02 # step size in the mesh self.names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree", "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis", "Quadratic Discriminant Analysis" ] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis() ] for i in range(0, len(self.names)): if self.names[i] == self.name: clf = classifiers[i] if train: start_time = int(time.time()) data = self.get_latest_prices(normalize=False) price_datasets = [[], []] for i, val in enumerate(data): try: # get classifier projection sample = create_sample_row(data, i, self.datasetinputs) last_price = data[i + self.datasetinputs - 1] next_price = data[i + self.datasetinputs] change = next_price - last_price pct_change = change / last_price fee_pct = get_fee_amount() fee_pct = fee_pct * 2 # fee x 2 since we'd need to clear both buy and sell fees to be profitable fee_pct = fee_pct * settings.FEE_MANAGEMENT_STRATEGY # see desc in settings.py do_buy = ClassifierTest.HOLD if abs( pct_change) < fee_pct else ( ClassifierTest.BUY if change > 0 else ClassifierTest.SELL) price_datasets[0].append(sample) price_datasets[1].append(do_buy) except Exception: pass data = price_datasets if self.timedelta_back_in_granularity_increments == 0: train_data = data test_data = [[], []] else: train_data = [ data[0][0:(-1 * self.timedelta_back_in_granularity_increments)], data[1][0:(-1 * self.timedelta_back_in_granularity_increments)] ] test_data = [ data[0][len(train_data[0]):], data[1][len(train_data[1]):] ] self.datasets = train_data X, y = train_data X = StandardScaler().fit_transform(X) self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( X, y, test_size=.4) self.min = {} self.max = {} self.xz = () mesh_args = [] for i in range(0, self.datasetinputs): self.min[i], self.max[i] = X[:, i].min() - .5, X[:, i].max() + .5 mesh_args.append(np.arange(self.min[i], self.max[i], h)) self.xz = np.meshgrid(*mesh_args) clf.fit(self.X_train, self.y_train) score = clf.score(self.X_test, self.y_test) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [self.x_min, m_max]x[self.y_min, self.y_max]. self.ravel_args = [] for i in range(0, self.datasetinputs): self.ravel_args.append(self.xz[i].ravel()) self._input = np.column_stack(self.ravel_args) if hasattr(clf, "decision_function"): self.Z = clf.decision_function(self._input) else: self.Z = clf.predict_proba(self._input)[:, 1] if test and len(test_data) > 0: stats = { 'r': 0, 'w': 0, 'p': { 0: 0, 1: 0, -1: 0 }, 'a': { 0: 0, 1: 0, -1: 0 } } ds = test_data for i in range(0, len(ds[0])): sample = ds[0][i] actual = ds[1][i] sample = StandardScaler().fit_transform(sample) prediction = clf.predict(sample) self.prediction = prediction stats['p'][prediction[0]] += 1 stats['a'][actual] += 1 stats['r' if actual == prediction[0] else 'w'] = stats[ 'r' if actual == prediction[0] else 'w'] + 1 pct_correct = (1.0 * stats['r'] / (stats['r'] + stats['w'])) all_output = all_output + str( ('stats', self.name, round(pct_correct, 2))) all_output = all_output + str(('stats_debug', stats)) self.percent_correct = int(pct_correct * 100) self.prediction_size = len(test_data[0]) all_output = all_output + str((self.name, round(score * 100))) self.score = score * 100 end_time = int(time.time()) self.time = end_time - start_time self.output = all_output self.clf = clf return clf
def handle(self, *args, **options): # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from sklearn.cross_validation import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_moons, make_circles, make_classification from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis h = .02 # step size in the mesh names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree", "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis", "Quadratic Discriminant Analysis"] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis()] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) from history.tools import normalization, filter_by_mins, create_sample_row from history.models import Price graph = False self.symbol ='BTC_ETH' self.minutes_back = 100 self.timedelta_back_in_granularity_increments = 0 datasetinputs = 2 gran_options = [1,5,15,30] gran_options = [30,60,120,240] datasets = [] _names = [] for gran in gran_options: self.granularity = gran splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments prices = Price.objects.filter(symbol=self.symbol).order_by('-created_on') prices = filter_by_mins(prices,self.granularity) prices = [price.price for price in prices] data = normalization(list(prices[0:splice_point])) data.reverse() price_datasets = [[],[]] for i,val in enumerate(data): try: # get NN projection sample = create_sample_row(data,i,datasetinputs) last_price = data[i+datasetinputs-1] next_price = data[i+datasetinputs] change = next_price - last_price pct_change = change / last_price fee_pct = 0.002 do_buy = -1 if abs(pct_change) < fee_pct and False else (1 if change > 0 else 0) price_datasets[0].append(sample) price_datasets[1].append(do_buy) except Exception as e: print(e) datasets.append(price_datasets) _names.append(str(gran)) if graph: figure = plt.figure(figsize=(27, 9)) i = 1 # iterate over datasets for _index in range(0,len(datasets)): ds = datasets[_index] # preprocess dataset, split into training and test part X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # just plot the dataset first if graph: cm = plt.cm.RdBu cm_bright = ListedColormap(['#FF0000', '#0000FF']) ax = plt.subplot(len(datasets), len(classifiers) + 1, i) # Plot the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # and testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) i += 1 # iterate over classifiers for name, clf in zip(names, classifiers): if graph: ax = plt.subplot(len(datasets), len(classifiers) + 1, i) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. _input = np.c_[xx.ravel(), yy.ravel()] if hasattr(clf, "decision_function"): Z = clf.decision_function(_input) else: Z = clf.predict_proba(_input)[:, 1] print(name,round(score*100)) # Put the result into a color plot if graph: Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) # Plot also the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # and testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) ax.set_title("("+_names[_index]+")"+name) text = ('%.2f' % score).lstrip('0') ax.text(xx.max() - .3, yy.min() + .3, text, size=15, horizontalalignment='right') i += 1 stats = { 'r' : 0, 'w' :0 } for ds in datasets: for i in range(0,len(ds[0])): sample = ds[0][i] actual = ds[1][i] prediction = clf.predict(sample) stats['r' if actual == prediction[0] else 'w'] =stats['r' if actual == prediction[0] else 'w'] + 1 print('stats',name,stats,round((100.0*stats['r']/(stats['r']+stats['w'])),2)) if graph: figure.subplots_adjust(left=.02, right=.98) plt.show()
def predict_v2(ticker, hidden_layers=15, NUM_MINUTES_BACK=1000, NUM_EPOCHS=1000, granularity_minutes=15, datasetinputs=5, learningrate=0.005, bias=False, momentum=0.1, weightdecay=0.0, recurrent=False, timedelta_back_in_granularity_increments=0): # setup print_and_log("(p)starting ticker:{} hidden:{} min:{} epoch:{} gran:{} dsinputs:{} learningrate:{} bias:{} momentum:{} weightdecay:{}\ recurrent:{}, timedelta_back_in_granularity_increments:{} ".format( ticker, hidden_layers, NUM_MINUTES_BACK, NUM_EPOCHS, granularity_minutes, datasetinputs, learningrate, bias, momentum, weightdecay, recurrent, timedelta_back_in_granularity_increments)) pt = PredictionTest() pt.type = 'mock' pt.symbol = ticker pt.datasetinputs = datasetinputs pt.hiddenneurons = hidden_layers pt.minutes_back = NUM_MINUTES_BACK pt.epochs = NUM_EPOCHS pt.momentum = momentum pt.granularity = granularity_minutes pt.bias = bias pt.bias_chart = -1 if pt.bias is None else (1 if pt.bias else 0) pt.learningrate = learningrate pt.weightdecay = weightdecay pt.recurrent = recurrent pt.recurrent_chart = -1 if pt.recurrent is None else (1 if pt.recurrent else 0) pt.timedelta_back_in_granularity_increments = timedelta_back_in_granularity_increments all_output = "" start_time = int(time.time()) # get neural network & data pt.get_nn() sample_data, test_data = pt.get_train_and_test_data() # output / testing round_to = 2 num_times_directionally_correct = 0 num_times = 0 diffs = [] profitloss_pct = [] for i, val in enumerate(test_data): try: # get NN projection sample = create_sample_row(test_data, i, datasetinputs) recommend, nn_price, last_sample, projected_change_pct = pt.predict(sample) # calculate profitability actual_price = test_data[i+datasetinputs] diff = nn_price - actual_price diff_pct = 100 * diff / actual_price directionally_correct = ((actual_price - last_sample) > 0 and (nn_price - last_sample) > 0) \ or ((actual_price - last_sample) < 0 and (nn_price - last_sample) < 0) if recommend != 'HOLD': profitloss_pct = profitloss_pct + [abs((actual_price - last_sample) / last_sample) * (1 if directionally_correct else -1)] if directionally_correct: num_times_directionally_correct = num_times_directionally_correct + 1 num_times = num_times + 1 diffs.append(diff) output = "{}) seq ending in {} => {} (act {}, {}/{} pct off); Recommend: {}; Was Directionally Correct:{}\ ".format(i, round(actual_price, round_to), round(nn_price, round_to), round(actual_price, round_to), round(diff, round_to), round(diff_pct, 1), recommend, directionally_correct) all_output = all_output + "\n" + output except Exception as e: if "list index out of range" not in str(e): print_and_log("(p)"+str(e)) pass avg_diff = sum([abs(diff[0]) for diff in diffs]) / num_times # noqa pct_correct = 100 * num_times_directionally_correct / num_times modeled_profit_loss = sum(profitloss_pct) / len(profitloss_pct) output = 'directionally correct {} of {} times. {}%. avg diff={}, profit={}'.format( num_times_directionally_correct, num_times, round(pct_correct, 0), round(avg_diff, 4), round(modeled_profit_loss, 3)) print_and_log("(p)"+output) all_output = all_output + "\n" + output end_time = int(time.time()) pt.time = end_time - start_time pt.prediction_size = len(diffs) pt.output = all_output pt.percent_correct = pct_correct pt.avg_diff = avg_diff pt.profitloss = modeled_profit_loss pt.profitloss_int = int(pt.profitloss * 100) pt.save() return pt.pk
def handle(self, *args, **options): #http://scikit-learn.org/stable/auto_examples/classification/plot_classification_probability.html from history.tools import normalization, filter_by_mins, create_sample_row from history.models import Price graph = False self.symbol = 'BTC_ETH' self.minutes_back = 100 self.timedelta_back_in_granularity_increments = 0 datasetinputs = 2 gran_options = [1, 5, 15, 30] gran_options = [30, 60, 120, 240] datasets = [] _names = [] for gran in gran_options: self.granularity = gran splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments prices = Price.objects.filter( symbol=self.symbol).order_by('-created_on') prices = filter_by_mins(prices, self.granularity) prices = [price.price for price in prices] data = normalization(list(prices[0:splice_point])) data.reverse() price_datasets = [[], []] for i, val in enumerate(data): try: # get NN projection sample = create_sample_row(data, i, datasetinputs) last_price = data[i + datasetinputs - 1] next_price = data[i + datasetinputs] change = next_price - last_price pct_change = change / last_price fee_pct = 0.002 do_buy = -1 if abs(pct_change) < fee_pct and False else ( 1 if change > 0 else 0) price_datasets[0].append([x for x in sample]) price_datasets[1].append(do_buy) except Exception as e: print(e) datasets.append(price_datasets) _names.append(str(gran)) _datasets = datasets # Author: Alexandre Gramfort <*****@*****.**> # License: BSD 3 clause import matplotlib.pyplot as plt import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC from sklearn import datasets iris = datasets.load_iris() X = iris.data[:, 0: 2] # we only take the first two features for visualization y = iris.target _datasets = _datasets[0] X = np.ndarray(shape=(len(_datasets[0]), 2), dtype=float, buffer=np.array(_datasets[0])) y = np.array(_datasets[1]) n_features = X.shape[1] C = 1.0 # Create different classifiers. The logistic regression cannot do # multiclass out of the box. classifiers = { 'L1 logistic': LogisticRegression(C=C, penalty='l1'), 'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2'), 'Linear SVC': SVC(kernel='linear', C=C, probability=True, random_state=0), 'L2 logistic (Multinomial)': LogisticRegression(C=C, solver='lbfgs', multi_class='multinomial') } n_classifiers = len(classifiers) plt.figure(figsize=(3 * 2, n_classifiers * 2)) plt.subplots_adjust(bottom=.2, top=.95) xx = np.linspace(3, 9, 100) yy = np.linspace(1, 5, 100).T xx, yy = np.meshgrid(xx, yy) Xfull = np.c_[xx.ravel(), yy.ravel()] for index, (name, classifier) in enumerate(classifiers.items()): classifier.fit(X, y) y_pred = classifier.predict(X) classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100 print("classif_rate for %s : %f " % (name, classif_rate)) # View probabilities= probas = classifier.predict_proba(Xfull) n_classes = np.unique(y_pred).size for k in range(n_classes): plt.subplot(n_classifiers, n_classes, index * n_classes + k + 1) plt.title("Class %d" % k) if k == 0: plt.ylabel(name) imshow_handle = plt.imshow(probas[:, k].reshape( (100, 100)), extent=(3, 9, 1, 5), origin='lower') plt.xticks(()) plt.yticks(()) idx = (y_pred == k) if idx.any(): plt.scatter(X[idx, 0], X[idx, 1], marker='o', c='k') ax = plt.axes([0.15, 0.04, 0.7, 0.05]) plt.title("Probability") plt.colorbar(imshow_handle, cax=ax, orientation='horizontal') plt.show()
def predict_v2(ticker, hidden_layers=15, NUM_MINUTES_BACK=1000, NUM_EPOCHS=1000, granularity_minutes=15, datasetinputs=5, learningrate=0.005, bias=False, momentum=0.1, weightdecay=0.0, recurrent=False, timedelta_back_in_granularity_increments=0): # setup print_and_log( "(p)starting ticker:{} hidden:{} min:{} epoch:{} gran:{} dsinputs:{} learningrate:{} bias:{} momentum:{} weightdecay:{}\ recurrent:{}, timedelta_back_in_granularity_increments:{} ". format(ticker, hidden_layers, NUM_MINUTES_BACK, NUM_EPOCHS, granularity_minutes, datasetinputs, learningrate, bias, momentum, weightdecay, recurrent, timedelta_back_in_granularity_increments)) pt = PredictionTest() pt.type = 'mock' pt.symbol = ticker pt.datasetinputs = datasetinputs pt.hiddenneurons = hidden_layers pt.minutes_back = NUM_MINUTES_BACK pt.epochs = NUM_EPOCHS pt.momentum = momentum pt.granularity = granularity_minutes pt.bias = bias pt.bias_chart = -1 if pt.bias is None else (1 if pt.bias else 0) pt.learningrate = learningrate pt.weightdecay = weightdecay pt.recurrent = recurrent pt.recurrent_chart = -1 if pt.recurrent is None else ( 1 if pt.recurrent else 0) pt.timedelta_back_in_granularity_increments = timedelta_back_in_granularity_increments all_output = "" start_time = int(time.time()) # get neural network & data pt.get_nn() sample_data, test_data = pt.get_train_and_test_data() # output / testing round_to = 2 num_times_directionally_correct = 0 num_times = 0 diffs = [] profitloss_pct = [] for i, val in enumerate(test_data): try: # get NN projection sample = create_sample_row(test_data, i, datasetinputs) recommend, nn_price, last_sample, projected_change_pct = pt.predict( sample) # calculate profitability actual_price = test_data[i + datasetinputs] diff = nn_price - actual_price diff_pct = 100 * diff / actual_price directionally_correct = ((actual_price - last_sample) > 0 and (nn_price - last_sample) > 0) \ or ((actual_price - last_sample) < 0 and (nn_price - last_sample) < 0) if recommend != 'HOLD': profitloss_pct = profitloss_pct + [ abs((actual_price - last_sample) / last_sample) * (1 if directionally_correct else -1) ] if directionally_correct: num_times_directionally_correct = num_times_directionally_correct + 1 num_times = num_times + 1 diffs.append(diff) output = "{}) seq ending in {} => {} (act {}, {}/{} pct off); Recommend: {}; Was Directionally Correct:{}\ ".format(i, round(actual_price, round_to), round(nn_price, round_to), round(actual_price, round_to), round(diff, round_to), round(diff_pct, 1), recommend, directionally_correct) all_output = all_output + "\n" + output except Exception as e: if "list index out of range" not in str(e): print_and_log("(p)" + str(e)) pass avg_diff = sum([abs(diff[0]) for diff in diffs]) / num_times # noqa pct_correct = 100 * num_times_directionally_correct / num_times modeled_profit_loss = sum(profitloss_pct) / len(profitloss_pct) output = 'directionally correct {} of {} times. {}%. avg diff={}, profit={}'.format( num_times_directionally_correct, num_times, round(pct_correct, 0), round(avg_diff, 4), round(modeled_profit_loss, 3)) print_and_log("(p)" + output) all_output = all_output + "\n" + output end_time = int(time.time()) pt.time = end_time - start_time pt.prediction_size = len(diffs) pt.output = all_output pt.percent_correct = pct_correct pt.avg_diff = avg_diff pt.profitloss = modeled_profit_loss pt.profitloss_int = int(pt.profitloss * 100) pt.save() return pt.pk
def handle(self, *args, **options): # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from sklearn.cross_validation import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_moons, make_circles, make_classification from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis h = .02 # step size in the mesh names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree", "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis", "Quadratic Discriminant Analysis" ] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis() ] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) from history.tools import normalization, filter_by_mins, create_sample_row from history.models import Price graph = False self.symbol = 'BTC_ETH' self.minutes_back = 100 self.timedelta_back_in_granularity_increments = 0 datasetinputs = 2 gran_options = [1, 5, 15, 30] gran_options = [30, 60, 120, 240] datasets = [] _names = [] for gran in gran_options: self.granularity = gran splice_point = self.minutes_back + self.timedelta_back_in_granularity_increments prices = Price.objects.filter( symbol=self.symbol).order_by('-created_on') prices = filter_by_mins(prices, self.granularity) prices = [price.price for price in prices] data = normalization(list(prices[0:splice_point])) data.reverse() price_datasets = [[], []] for i, val in enumerate(data): try: # get NN projection sample = create_sample_row(data, i, datasetinputs) last_price = data[i + datasetinputs - 1] next_price = data[i + datasetinputs] change = next_price - last_price pct_change = change / last_price fee_pct = 0.002 do_buy = -1 if abs(pct_change) < fee_pct and False else ( 1 if change > 0 else 0) price_datasets[0].append(sample) price_datasets[1].append(do_buy) except Exception as e: print(e) datasets.append(price_datasets) _names.append(str(gran)) if graph: figure = plt.figure(figsize=(27, 9)) i = 1 # iterate over datasets for _index in range(0, len(datasets)): ds = datasets[_index] # preprocess dataset, split into training and test part X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # just plot the dataset first if graph: cm = plt.cm.RdBu cm_bright = ListedColormap(['#FF0000', '#0000FF']) ax = plt.subplot(len(datasets), len(classifiers) + 1, i) # Plot the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # and testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) i += 1 # iterate over classifiers for name, clf in zip(names, classifiers): if graph: ax = plt.subplot(len(datasets), len(classifiers) + 1, i) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. _input = np.c_[xx.ravel(), yy.ravel()] if hasattr(clf, "decision_function"): Z = clf.decision_function(_input) else: Z = clf.predict_proba(_input)[:, 1] print(name, round(score * 100)) # Put the result into a color plot if graph: Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) # Plot also the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # and testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) ax.set_title("(" + _names[_index] + ")" + name) text = ('%.2f' % score).lstrip('0') ax.text(xx.max() - .3, yy.min() + .3, text, size=15, horizontalalignment='right') i += 1 stats = {'r': 0, 'w': 0} for ds in datasets: for i in range(0, len(ds[0])): sample = ds[0][i] actual = ds[1][i] prediction = clf.predict(sample) stats['r' if actual == prediction[0] else 'w'] = stats[ 'r' if actual == prediction[0] else 'w'] + 1 print( 'stats', name, stats, round((100.0 * stats['r'] / (stats['r'] + stats['w'])), 2)) if graph: figure.subplots_adjust(left=.02, right=.98) plt.show()