def plot_industry_roc(startDate, endDate):
    dates = pd.date_range(startDate, endDate)
    symbolList = data.getSubIndustrySymbol()
    df = utl.loadPriceData(symbolList, dates)

    df_roc = ind.roc(df)
    df_roc.fillna(0, inplace=True)  # fill NaN to 0

    result = np.mean(df_roc)
    result = result.where(result > 0)
    result = result.where(result >= result['SET'])

    maxIndexResult = np.argsort(result)
    nameSymbol = [
        df_roc.columns[index] for index in reversed(maxIndexResult)
        if result[index] > 0
    ]
    print("List of Industry Group and Sector that ROC > SET (ROC)\n",
          nameSymbol)

    df_norm = utl.normalize_data(df)
    utg.plotStock(df_norm,
                  nameSymbol,
                  startDate,
                  endDate,
                  title='ROC: Industry Group and Sector')
Example #2
0
def getTrainData_2(symbol, startDate, endDate, remove_head=15):
    dates = pd.date_range(startDate, endDate)
    df = util.loadPriceData([symbol], dates)  #1
    close = df.ix[0:len(df) - 1]  # skip tail
    assert len(df) == len(close) + 1

    ema5 = ind.ema(close, periods=5)  #2
    sma5 = ind.ema(close, periods=5)  #3
    ema15 = ind.ema(close, periods=5)  #4
    sma15 = ind.ema(close, periods=5)  #5

    bb1p = ind.get_BBANDS(close, symbol, periods=14, mul=1)  #6, #7
    bb2p = ind.get_BBANDS(close, symbol, periods=14, mul=2)  #8, #9
    _, _, macd = ind.average_convergence(close)  #10
    signal_macd = ind.signal_MACD(macd)  #11

    rsi = ind.rsi(close)  #12

    ohlcv = util.load_OHLCV(symbol, dates)
    percent_KD = ind.percent_KD(ohlcv)  #14, 15

    # rename columns
    close.rename(columns={symbol: 'CLOSE'}, inplace=True)
    ema5.rename(columns={symbol: 'EMA5'}, inplace=True)
    sma5.rename(columns={symbol: 'SMA5'}, inplace=True)
    ema15.rename(columns={symbol: 'EMA15'}, inplace=True)
    sma15.rename(columns={symbol: 'SMA15'}, inplace=True)
    bb1p.rename(columns={'LOWER': 'LOWER_BB1P'}, inplace=True)
    bb1p.rename(columns={'UPPER': 'UPPER_BB1P'}, inplace=True)
    bb2p.rename(columns={'LOWER': 'LOWER_BB2P'}, inplace=True)
    bb2p.rename(columns={'UPPER': 'UPPER_BB2P'}, inplace=True)
    macd.rename(columns={symbol: 'MACD'}, inplace=True)
    signal_macd.rename(columns={symbol: 'SG_MACD'}, inplace=True)
    rsi.rename(columns={symbol: 'RSI'}, inplace=True)

    Xtrain = pd.DataFrame(index=close.index)
    Xtrain = Xtrain.join(close['CLOSE'])  # 1
    Xtrain = Xtrain.join(ema5['EMA5'])  # 2
    Xtrain = Xtrain.join(sma5['SMA5'])  # 3
    Xtrain = Xtrain.join(ema15['EMA15'])  # 4
    Xtrain = Xtrain.join(sma15['SMA15'])  # 5
    Xtrain = Xtrain.join(bb1p['LOWER_BB1P'])  # 6
    Xtrain = Xtrain.join(bb1p['UPPER_BB1P'])  # 7
    Xtrain = Xtrain.join(bb2p['LOWER_BB2P'])  # 8
    Xtrain = Xtrain.join(bb2p['UPPER_BB2P'])  # 9
    Xtrain = Xtrain.join(macd['MACD'])  # 10
    Xtrain = Xtrain.join(signal_macd['SG_MACD'])  # 11
    Xtrain = Xtrain.join(rsi['RSI'])  # 12
    Xtrain = Xtrain.join(percent_KD)  # 13

    Ytrain = df[symbol].shift(-1)  # skip SET and shift
    Ytrain.dropna(0, inplace=True)
    assert len(Ytrain) == len(Xtrain)

    # skip at head row, avoid NaN values
    Xtrain = Xtrain[remove_head:]
    Ytrain = Ytrain[remove_head:]

    return Xtrain, Ytrain
Example #3
0
def getTrainData_1(symbol, startDate, endDate, periods=14, remove_head=19):
    dates = pd.date_range(startDate, endDate)
    df = util.loadPriceData([symbol], dates)
    util.fill_missing_values(df)

    # skip periods day latest (14 days) that predict a price is new high or not
    df_sliced = df.ix[0:len(df) - periods]
    price_close = pd.DataFrame(df_sliced[symbol])
    set = pd.DataFrame(df_sliced['SET'])

    rsi = ind.rsi(price_close) / 100  # normalize
    sr = ind.create_dataframe_SR(df_sliced, [symbol])
    myRatio = ind.get_myRatio(price_close)
    daily = ind.daily_returns(price_close) * 100  # normalize
    _, _, macd = ind.average_convergence(price_close)

    ohlcv = util.load_OHLCV(symbol, dates)
    percent_KD = ind.percent_KD(ohlcv) / 100  # normalize
    c2o = ind.daily_returns_2(ohlcv) * 100  # normalize

    volume = util.loadVolumeData([symbol], dates)
    #skip periods day latest
    volume_sliced = volume.ix[0:len(volume) - periods]
    assert len(volume_sliced) == len(df_sliced)
    obv = ind.OBV(volume_sliced, df_sliced)
    obv_rsi = ind.rsi(obv) / 100  # calcuate momentum with rsi and normalize
    set_rsi = ind.rsi(set) / 100  # calcuate momentum with rsi and normalize

    # Join data frame
    # rename column
    rsi.rename(columns={symbol: 'RSI'}, inplace=True)
    #sr.rename(columns={symbol:'SR'},inplace=True)
    myRatio.rename(columns={symbol: 'MY'}, inplace=True)
    daily.rename(columns={symbol: 'DAILY'}, inplace=True)
    macd.rename(columns={symbol: 'MACD'}, inplace=True)
    obv_rsi.rename(columns={symbol: 'OBV_RSI'}, inplace=True)
    set_rsi.rename(columns={'SET': 'SET_RSI'}, inplace=True)

    Xtrain = pd.DataFrame(index=df_sliced.index)
    Xtrain = Xtrain.join(rsi['RSI'])
    Xtrain = Xtrain.join(percent_KD['%K'])
    #Xtrain = Xtrain.join(sr['SR'])
    Xtrain = Xtrain.join(myRatio['MY'])
    Xtrain = Xtrain.join(daily['DAILY'])
    Xtrain = Xtrain.join(macd['MACD'])
    Xtrain = Xtrain.join(c2o)
    Xtrain = Xtrain.join(obv_rsi['OBV_RSI'])
    Xtrain = Xtrain.join(set_rsi)

    upTrend = ind.isUpTrend(df, symbol, periods=periods)
    Ydigit = 1 * upTrend[
        symbol]  # multiply 1 : True is converted to 1 (up trend) , False becomes 0

    # skip at head row, avoid NaN values
    Xtrain = Xtrain.ix[remove_head:]
    Ydigit = Ydigit.ix[remove_head:]
    price_close = price_close.ix[remove_head:]
    return Xtrain, Ydigit, price_close
Example #4
0
def getTrainData_4(symbol, startDate, endDate, periods=14, remove_head=19):
    dates = pd.date_range(startDate, endDate)

    # day periods that predict a price is new high or not
    df = util.loadPriceData([symbol], dates)

    # skip periods day latest
    df_sliced = df.ix[0:len(df) - periods]

    price_close = pd.DataFrame(df_sliced[symbol])
    set = pd.DataFrame(df_sliced['SET'])

    bbands = ind.get_BBANDS(price_close, symbol)
    ema26, ema12, macd = ind.average_convergence(price_close)
    rsi = ind.rsi(price_close)
    daily = ind.daily_returns(price_close)

    ohlcv = util.load_OHLCV(symbol, dates)
    percent_KD = ind.percent_KD(ohlcv)

    volume = util.loadVolumeData(symbol, dates)
    #skip periods day latest
    volume_sliced = volume.ix[0:len(volume) - periods]
    assert len(volume_sliced) == len(price_close)

    volume_sliced = pd.DataFrame(volume_sliced[symbol])
    obv = ind.OBV(volume_sliced, price_close)

    # Join data frame
    # rename column
    price_close.rename(columns={symbol: 'CLOSE'}, inplace=True)
    ema26.rename(columns={symbol: 'EMA26'}, inplace=True)
    ema12.rename(columns={symbol: 'EMA12'}, inplace=True)
    daily.rename(columns={symbol: 'DAILY'}, inplace=True)
    rsi.rename(columns={symbol: 'RSI'}, inplace=True)
    obv.rename(columns={symbol: 'OBV'}, inplace=True)

    Xtrain = price_close
    Xtrain = Xtrain.join(bbands['UPPER'])
    Xtrain = Xtrain.join(bbands['LOWER'])
    Xtrain = Xtrain.join(ema26)
    Xtrain = Xtrain.join(ema12)
    Xtrain = Xtrain.join(rsi)
    Xtrain = Xtrain.join(percent_KD['%K'])
    Xtrain = Xtrain.join(obv)
    Xtrain = Xtrain.join(set)

    upTrend = ind.isUpTrend(df, symbol, periods=periods)
    Ydigit = 1 * upTrend[
        symbol]  # multiply 1 : True is converted to 1 (up trend) , False becomes 0
    assert len(Xtrain) == len(Ydigit)

    # skip at head row, avoid NaN values
    Xtrain = Xtrain.ix[remove_head:]
    Ydigit = Ydigit.ix[remove_head:]
    Xtrain.fillna(0, inplace=True)  # protected NaN value
    return Xtrain, Ydigit
def prepareDataSet(symbols, dates, csv_dir=DIR_SEC_CSV,output_dir=output_dataset):		
	clearDir(output_dir)
	column_names = ['<CLOSE>', '<VOL>']
	column_date = '<DTYYYYMMDD>'
	
	count = 0
	for sym in symbols:		
		df_X = createDataFrame(sym, dates, column_names, csvdir)							
		#df_X.to_csv(join(output_dir, "{}_test_check.csv".format(sym))) 	
		df_norm = util.normalize_data(df_X)		
		#df_norm.to_csv(join(output_dir, "{}_test_check_norm.csv".format(sym))) #ไม่มี index ในไฟล์
		writeDataSetX(df_norm, join(output_dir, "{}_X.csv".format(sym)))

		if(count%20 == 0):
			print("Writing total files : {} ....".format(count))			
		count+=1;		
		
	df_Y = util.loadPriceData(symbols, dates)			
	writeDataSetY(df_Y, symbols)
def max_change(startDate, endDate):
    dates = pd.date_range(startDate, endDate)
    symbolList = data.getAllSymbol()
    df = utl.loadPriceData(symbolList, dates)
    day4 = df.ix[-4:]  # select last dast in 4 days
    day4 = day4.iloc[::-1]  # reverse rows
    day4 = day4.pct_change()  # calcuate percent change of prices
    day4 = day4[1:4]  # remove NaN in first rows
    day3 = day4.fillna(0)
    result = day3.mean()
    print(day3)
    maxIndexResult = np.argsort(result)
    nameSymbol = [
        day3.columns[index] for index in reversed(maxIndexResult)
        if result[index] > 3
    ]
    print("List of high percent change\n", nameSymbol)

    df_norm = utl.normalize_data(df)
    utg.plotStock(df_norm,
                  nameSymbol[0:5],
                  startDate,
                  endDate,
                  title='High percent change')
import indicator as ind
import dataset as data
import utility as utl
import utilgraph as utg

import numpy as np
import pandas as pd
import pickle

startDate = '2017-05-01'
endDate = utl.getCurrentDateStr()
dates = pd.date_range(startDate, endDate)
symbolList = data.getAllSymbol()
df = utl.loadPriceData(symbolList, dates)


def max_sharpe_ratio(periods=5):
    df_sr = ind.create_dataframe_SR(df, symbolList, window=periods)
    df_sr.fillna(0, inplace=True)  # fill NaN to 0
    df_sr = df_sr.shift(-(periods - 1))
    df_sr.dropna(0, inplace=True)  # drop NaN at tail
    result = np.mean(df_sr)

    # Returns the indices that would sort an array
    # max values at last element
    maxIndexResult = np.argsort(result)
    nameSymbol = [
        df_sr.columns[index] for index in reversed(maxIndexResult)
        if result[index] > 0
    ]
Example #8
0
def getTrainData_3(symbol, startDate, endDate, remove_head=15):
    dates = pd.date_range(startDate, endDate)

    df = util.loadPriceData([symbol], dates)  #1
    close = df.ix[0:len(df) - 1]  # skip tail
    assert len(df) == len(close) + 1

    ema5 = ind.ema(close, periods=5)  #2
    sma5 = ind.ema(close, periods=5)  #3
    ema15 = ind.ema(close, periods=5)  #4
    sma15 = ind.ema(close, periods=5)  #5

    bb1p = ind.get_BBANDS(close, symbol, periods=14, mul=1)  #6, #7
    bb2p = ind.get_BBANDS(close, symbol, periods=14, mul=2)  #8, #9
    _, _, macd = ind.average_convergence(close)  #10
    signal_macd = ind.signal_MACD(macd)  #11

    rsi = ind.rsi(close)  #12

    ohlcv = util.load_OHLCV(symbol, dates)
    percent_KD = ind.percent_KD(ohlcv)  #14, 15

    # rename columns
    close.rename(columns={symbol: 'CLOSE'}, inplace=True)
    ema5.rename(columns={symbol: 'EMA5'}, inplace=True)
    sma5.rename(columns={symbol: 'SMA5'}, inplace=True)
    #ema15.rename(columns={symbol:'EMA15'},inplace=True)
    #sma15.rename(columns={symbol:'SMA15'},inplace=True)
    bb1p.rename(columns={'LOWER': 'LOWER_BB1P'}, inplace=True)
    bb1p.rename(columns={'UPPER': 'UPPER_BB1P'}, inplace=True)
    #bb2p.rename(columns={'LOWER':'LOWER_BB2P'},inplace=True)
    #bb2p.rename(columns={'UPPER':'UPPER_BB2P'},inplace=True)
    macd.rename(columns={symbol: 'MACD'}, inplace=True)
    #signal_macd.rename(columns={symbol:'SG_MACD'},inplace=True)
    rsi.rename(columns={symbol: 'RSI'}, inplace=True)

    Xtrain = pd.DataFrame(index=close.index)
    Xtrain = Xtrain.join(close['CLOSE'])  # 1
    Xtrain = Xtrain.join(ema5['EMA5'])  # 2
    Xtrain = Xtrain.join(sma5['SMA5'])  # 3
    #Xtrain = Xtrain.join(ema15['EMA15'])		# 4
    #Xtrain = Xtrain.join(sma15['SMA15'])		# 5
    Xtrain = Xtrain.join(bb1p['LOWER_BB1P'])  # 6
    Xtrain = Xtrain.join(bb1p['UPPER_BB1P'])  # 7
    #Xtrain = Xtrain.join(bb2p['LOWER_BB2P'])	# 8
    #Xtrain = Xtrain.join(bb2p['UPPER_BB2P'])	# 9
    Xtrain = Xtrain.join(macd['MACD'])  # 10
    #Xtrain = Xtrain.join(signal_macd['SG_MACD']) # 11
    Xtrain = Xtrain.join(rsi['RSI'])  # 12
    Xtrain = Xtrain.join(percent_KD)  # 13

    df = df[symbol]  # skip SET
    daily = ind.daily_returns(df)
    daily = daily.shift(-1)  # predict tommorow
    daily.dropna(0, inplace=True)  # drop NaN in last row
    Ytrain = 1 * (daily > 0.0)  # if positive it converted 1, or 0 in negative
    assert len(Ytrain) == len(Xtrain)

    # skip at head row, avoid NaN values
    Xtrain = Xtrain[remove_head:]
    Ytrain = Ytrain[remove_head:]

    # normalize
    normalizer = preprocessing.Normalizer().fit(Xtrain)
    Xnorm = normalizer.transform(Xtrain)
    Xnorm = pd.DataFrame(Xnorm, columns=Xtrain.columns)

    return Xnorm, Ytrain