nan = np.isnan(data) nnan = np.where(~nan)[0][0] data[:nnan] = data[nnan] if mean == None: mean = np.mean(data) if std == None: std = np.std(data) return (data - mean) / std, mean, std if __name__=="__main__": # prepare date symbol = "0005.HK" stock_data = YahooHistorical() stock_data.open(os.path.join(os.path.dirname(__file__), "data/" + symbol + ".csv")) dataset = stock_data.get() date = np.array([n['date'] for n in dataset]) close_prices = np.array([n['adj_close'] for n in dataset]) low_prices = np.array([n['low'] for n in dataset]) high_prices = np.array([n['high'] for n in dataset]) volumes = np.array([n['vol'] for n in dataset]) prices, mean, std = normalize(close_prices) low_prices, mean, std = normalize(low_prices, mean, std) high_prices, mean, std = normalize(high_prices, mean, std) emax = list(argrelextrema(prices, np.greater)[0]) emin = list(argrelextrema(prices, np.less)[0]) sma13, mean, std = normalize(talib.SMA(close_prices, 13), mean, std) # 50-day SMA sma50, mean, std = normalize(talib.SMA(close_prices, 50), mean, std) # 50-day SMA sma100, mean, std = normalize(talib.SMA(close_prices, 100), mean, std) # 100-day SMA sma200, mean, std = normalize(talib.SMA(close_prices, 200), mean, std) # 200-day SMA macd_upper, macd_middle, macd_lower = talib.MACD(close_prices, 12, 26, 9)
import matplotlib.pyplot as plt from scipy.signal import argrelextrema def normalize(data, mean=None, std=None): if mean == None: mean = np.mean(data) if std == None: std = np.std(data) x = (data - mean) / std return x, mean, std # prepare date symbol = '^HSI' yahoo_data = YahooHistorical(data_from=date(2014, 1, 1), data_to=date(2015, 12, 31)) yahoo_data.open(os.path.join(os.path.dirname(__file__), 'data/' + symbol + '.csv')) training_set = yahoo_data.get() test_set = training_set[200:] rsi = yahoo_data.relative_strength(n=14) (sma13, sma7, macd) = yahoo_data.moving_average_convergence(7, 13) # 7 days and 13 days moving average label = np.array([n['date'] for n in test_set]) (training_prices, m, s) = normalize(np.array([n['adj_close'] for n in training_set])) (prices, m, s) = normalize(np.array([n['adj_close'] for n in test_set])) (tmax, tmin) = yahoo_data.trading_range_breakout(50) (sma13, m, s) = normalize(sma13, m, s) (sma7, m, s) = normalize(sma7, m, s) (tmax, m, s) = normalize(tmax, m, s) (tmin, m, s) = normalize(tmin, m, s) (macd, m, s) = normalize(macd) (rsi, m, s) = normalize(rsi) emax = list(argrelextrema(prices, np.greater)[0]) emin = list(argrelextrema(prices, np.less)[0])
from data_prepare import YahooHistorical from datetime import date, datetime import numpy as np from itertools import cycle from sys import stdout import matplotlib.pyplot as plt from pybrain.supervised import RPropMinusTrainer, BackpropTrainer from pybrain.datasets import SequentialDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.structure.modules import LSTMLayer, SigmoidLayer, LinearLayer, TanhLayer from pybrain.tools.validation import testOnSequenceData # prepare date yahoo_data = YahooHistorical() yahoo_data.open(os.path.join(os.path.dirname(__file__), 'data/^HSI.csv')) dataset = yahoo_data.get() # build network net = buildNetwork(5, 25, 2, hiddenclass=LSTMLayer, outclass=SigmoidLayer, outputbias=False, recurrent=True) net.randomize() # build sequential dataset train_ds = SequentialDataSet(5, 2) for n, n1, m20 in zip(training_set, training_set[1:], sma20[-len(training_set):]): i = [n['open'], n['high'], n['low'], n['adj_close'], m20] d = (n1['adj_close'] - n['adj_close']) / n['adj_close'] o = [-1, -1] if d > 0: o[0] = abs(d) else: o[1] = abs(d)
import os from sys import stdout import matplotlib.pyplot as plt from data_prepare import YahooHistorical from datetime import date, datetime import numpy as np from lib.features import Features from pybrain.structure.modules import KohonenMap import pickle np.random.seed(42) symbol1 = '0005.HK' yahoo_data1 = YahooHistorical(data_from=date(2000, 1, 1), data_to=date(2015, 12, 31)) yahoo_data1.open(os.path.join(os.path.dirname(__file__), 'data/' + symbol1 + '.csv')) data1 = yahoo_data1.get() dataset1 = np.asarray([n['adj_close'] for n in data1]) p = 17 # 17-day p = 5 # 5-day nodes = 3 som = KohonenMap(p, nodes) # som = pickle.load(open("pattern5.p", "rb")) som.learningrate = 0.01 epochs = 1000 training_dataset = [] result = {} # preparation for i in xrange(p, len(dataset1)): training_input = dataset1[i-p:i] mmax = np.max(training_input)
from pybrain.datasets import SequentialDataSet, SequenceClassificationDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.structure.modules import LSTMLayer, SigmoidLayer, LinearLayer, TanhLayer, SoftmaxLayer from pybrain.structure.modules.biasunit import BiasUnit from pybrain.structure.connections.full import FullConnection from pybrain.structure import RecurrentNetwork from pybrain.tools.xml.networkwriter import NetworkWriter from pybrain.tools.xml.networkreader import NetworkReader from pybrain.utilities import percentError np.random.seed(42) symbol = '^HSI' yahoo_data = YahooHistorical(data_from=date(2001, 1, 1), data_to=date(2015, 12, 31)) yahoo_data.open(os.path.join(os.path.dirname(__file__), 'data/' + symbol + '.csv')) data = yahoo_data.get() date = np.asarray([n['date'] for n in data]) test_n = 201 open_prices = np.asarray([n['open'] for n in data]) low_prices = np.asarray([n['low'] for n in data]) high_prices = np.asarray([n['high'] for n in data]) close_prices = np.asarray([n['adj_close'] for n in data]) volumes = np.asarray([n['vol'] for n in data]) features = Features(open_prices, low_prices, high_prices, close_prices, volumes) f_input = features.getInput() f_output = features.getOutput() training_input = f_input[200:-test_n] training_output = f_output[200:-test_n] testing_input = f_input[-test_n:-20] testing_output = f_output[-test_n:-20] testing_label = date[-test_n:-20]
mean = np.mean(data) if std == None: std = np.std(data) x = (data - mean) / std return x, mean, std def ir(p, i, data): # p-days, i=current day a = np.sum([n['adj_close'] for n in data[i:i+p]]) / p c = data[i]['adj_close'] return ((a - c) / c) # prepare date symbol = '0700.HK' yahoo_data = YahooHistorical(data_from=date(2014, 1, 1), data_to=date(2015, 12, 31)) yahoo_data.open(os.path.join(os.path.dirname(__file__), 'data/' + symbol + '.csv')) test_set = yahoo_data.get() rsi = yahoo_data.relative_strength(n=14) (sma13, sma7, macd) = yahoo_data.moving_average_convergence(7, 13) # 7 days and 13 days moving average and MACD test_label = np.array([n['date'] for n in test_set]) (prices, tmean, tstd) = normalize(np.array([n['adj_close'] for n in test_set])) test_target = np.zeros(len(prices)) emax = list(argrelextrema(prices, np.greater)[0]) emin = list(argrelextrema(prices, np.less)[0]) (tmax50, tmin50) = yahoo_data.trading_range_breakout(50) (tmax100, tmin100) = yahoo_data.trading_range_breakout(100) (tmax200, tmin200) = yahoo_data.trading_range_breakout(200) (tmax50, a, s) = normalize(tmax50, tmean, tstd) (tmin50, a, s) = normalize(tmin50, tmean, tstd) (tmax100, a, s) = normalize(tmax100, tmean, tstd) (tmin100, a, s) = normalize(tmin100, tmean, tstd) (tmax200, a, s) = normalize(tmax200, tmean, tstd)