Esempio n. 1
0
def getTrainData_1(symbol, startDate, endDate, periods=14, remove_head=19):
    dates = pd.date_range(startDate, endDate)
    df = util.loadPriceData([symbol], dates)
    util.fill_missing_values(df)

    # skip periods day latest (14 days) that predict a price is new high or not
    df_sliced = df.ix[0:len(df) - periods]
    price_close = pd.DataFrame(df_sliced[symbol])
    set = pd.DataFrame(df_sliced['SET'])

    rsi = ind.rsi(price_close) / 100  # normalize
    sr = ind.create_dataframe_SR(df_sliced, [symbol])
    myRatio = ind.get_myRatio(price_close)
    daily = ind.daily_returns(price_close) * 100  # normalize
    _, _, macd = ind.average_convergence(price_close)

    ohlcv = util.load_OHLCV(symbol, dates)
    percent_KD = ind.percent_KD(ohlcv) / 100  # normalize
    c2o = ind.daily_returns_2(ohlcv) * 100  # normalize

    volume = util.loadVolumeData([symbol], dates)
    #skip periods day latest
    volume_sliced = volume.ix[0:len(volume) - periods]
    assert len(volume_sliced) == len(df_sliced)
    obv = ind.OBV(volume_sliced, df_sliced)
    obv_rsi = ind.rsi(obv) / 100  # calcuate momentum with rsi and normalize
    set_rsi = ind.rsi(set) / 100  # calcuate momentum with rsi and normalize

    # Join data frame
    # rename column
    rsi.rename(columns={symbol: 'RSI'}, inplace=True)
    #sr.rename(columns={symbol:'SR'},inplace=True)
    myRatio.rename(columns={symbol: 'MY'}, inplace=True)
    daily.rename(columns={symbol: 'DAILY'}, inplace=True)
    macd.rename(columns={symbol: 'MACD'}, inplace=True)
    obv_rsi.rename(columns={symbol: 'OBV_RSI'}, inplace=True)
    set_rsi.rename(columns={'SET': 'SET_RSI'}, inplace=True)

    Xtrain = pd.DataFrame(index=df_sliced.index)
    Xtrain = Xtrain.join(rsi['RSI'])
    Xtrain = Xtrain.join(percent_KD['%K'])
    #Xtrain = Xtrain.join(sr['SR'])
    Xtrain = Xtrain.join(myRatio['MY'])
    Xtrain = Xtrain.join(daily['DAILY'])
    Xtrain = Xtrain.join(macd['MACD'])
    Xtrain = Xtrain.join(c2o)
    Xtrain = Xtrain.join(obv_rsi['OBV_RSI'])
    Xtrain = Xtrain.join(set_rsi)

    upTrend = ind.isUpTrend(df, symbol, periods=periods)
    Ydigit = 1 * upTrend[
        symbol]  # multiply 1 : True is converted to 1 (up trend) , False becomes 0

    # skip at head row, avoid NaN values
    Xtrain = Xtrain.ix[remove_head:]
    Ydigit = Ydigit.ix[remove_head:]
    price_close = price_close.ix[remove_head:]
    return Xtrain, Ydigit, price_close
def writeDataSetY(df, symbols, predict=0, window=20, output_dir=output_dataset):
	symbols = df.columns.values	
	df_sr = ind.create_dataframe_SR(df, symbols)		
	df_result = ind.isStrongSR(df_sr)
	
	# By defult, Use data of 20 days (backward) to predict 15 days (foward)
	# Because I would like to know sharpe ratio that are more than 0 continue 10 days or not
	shift = window + predict -1
	df_result = df_result.shift(-1*shift)
	df_result=df_result.dropna(0)
	df_result.to_csv(join(output_dir, "@Dataset_Y.csv"), index_label='<DTYYYYMMDD>') 		
def max_sharpe_ratio(periods=5):
    df_sr = ind.create_dataframe_SR(df, symbolList, window=periods)
    df_sr.fillna(0, inplace=True)  # fill NaN to 0
    df_sr = df_sr.shift(-(periods - 1))
    df_sr.dropna(0, inplace=True)  # drop NaN at tail
    result = np.mean(df_sr)

    # Returns the indices that would sort an array
    # max values at last element
    maxIndexResult = np.argsort(result)
    nameSymbol = [
        df_sr.columns[index] for index in reversed(maxIndexResult)
        if result[index] > 0
    ]

    # save to dump file
    pickle.dump(nameSymbol, open("symbol_list.p", "wb"))
    print("List max sharpe ratio\n", nameSymbol)

    df_norm = utl.normalize_data(df)
    utg.plotStock(df_norm, nameSymbol[0:5], startDate, endDate,
                  'High sharpe ratio')