def get_Residuals_LinearModel(X_data, price, lag = 20): # This functions gets the residuals when we train a linear model with # the X_data into predictiong the price return with a given lag. ## Prediction of the thingies Y_data = bMA.diff(price, lag = lag, cval = np.NaN) Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN) ## Eliminate the Nans !! Even if they appear in just one dim mask_X = np.sum(np.isnan(X_data), axis = 1) == 0 mask_Y = np.isnan(Y_data) == 0 mask = mask_X & mask_Y[:,0] # Create linear regression object regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(X_data[mask,:], Y_data[mask,:]) # coeffs = np.array([regr.intercept_, regr.coef_])[0] coeffs = np.append(regr.intercept_, regr.coef_) params = np.array(coeffs) residual = regr.residues_ residual = regr.score(X_data[mask,:], Y_data[mask,:]) return residual
def get_Residuals_LinearModel(X_data, price, lag=20): # This functions gets the residuals when we train a linear model with # the X_data into predictiong the price return with a given lag. ## Prediction of the thingies Y_data = bMA.diff(price, lag=lag, cval=np.NaN) Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN) ## Eliminate the Nans !! Even if they appear in just one dim mask_X = np.sum(np.isnan(X_data), axis=1) == 0 mask_Y = np.isnan(Y_data) == 0 mask = mask_X & mask_Y[:, 0] # Create linear regression object regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(X_data[mask, :], Y_data[mask, :]) # coeffs = np.array([regr.intercept_, regr.coef_])[0] coeffs = np.append(regr.intercept_, regr.coef_) params = np.array(coeffs) residual = regr.residues_ residual = regr.score(X_data[mask, :], Y_data[mask, :]) return residual
# easier for algorithms to learn from it day_of_week = np.array(data_df.index.dayofweek) week_of_year = np.array(data_df.index.weekofyear) year = np.array(data_df.index.year) ## Add the lagged value to the database Nlag_time_information = 1 tut.add_lagged_values(data_df, day_of_week, "day", Nlag_time_information) tut.add_lagged_values(data_df, week_of_year, "week", Nlag_time_information) tut.add_lagged_values(data_df, year, "year", Nlag_time_information) ################ OCHL variables ################## # Variables trivially obtained from daily OHCL Target = Target # Increase in Close price Range_HL = H - L # measure of volatility Daily_gap = O - bMl.shift(C, lag=1).flatten() # measure of price movement ## Add the lagged value to the database Nlag_OCHL_information = 3 tut.add_lagged_values(data_df, Target, "Target", Nlag_OCHL_information) tut.add_lagged_values(data_df, Range_HL, "Range_HL", Nlag_OCHL_information) tut.add_lagged_values(data_df, Daily_gap, "Daily_gap", Nlag_OCHL_information) ################## Daily Trading Indicators #################### # Hulls_average !! ACDC, Volatility, ATR, Short nHMA = 20 ## Hulls Average, reactive but smoothing MA HMA = indl.get_HMA(timeData_daily.get_timeSeries(["Close"]), nHMA) ## Volatility
## ATR Parameter ! ATR = Cartera.ATR(n=14) ATR_vel = bMA.diff(ATR, n=1) RSI_vel = indl.get_SMA(ATR_vel, L=nsmooth_vel) ########################################################### ################# PREPARE THE DATA ######################## ########################################################### X_data = np.concatenate((MACD[:, [indx]], MACD_vel[:, [indx]]), axis=1) X_data = np.concatenate((X_data, RSI[:, [indx]], ATR[:, [indx]]), axis=1) X_data = np.concatenate((X_data, RSI_vel[:, [indx]], ATR_vel[:, [indx]]), axis=1) Y_data = bMA.diff(prices[:, indx], lag=lag, cval=np.NaN) Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN) ### Returns lag_ret = 20 return_Ydata = bMA.get_return(prices[:, [indx]], lag=lag_ret) reconstruct_Ydata = bMA.reconstruc_return(prices[:, [indx]], return_Ydata, lag=lag_ret) gl.plot([], prices[:, [indx]], legend=["price"]) gl.plot([], reconstruct_Ydata, nf=0, legend=["reconstruction"]) gl.plot([], return_Ydata, nf=0, na=1, legend=["return"]) Y_data = return_Ydata Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN)
# we read it with this one. ######## PANDAS FORMAT folder_dataFeatures = "./data/" data = pd.read_csv(folder_dataFeatures + "dataPandas.csv", sep = ',', index_col = 0, dtype = {"Date":dt.datetime}) data.index = ul.str_to_datetime (data.index.tolist()) ######## NUMPY ARRAYS FORMAT X_data = np.loadtxt(folder_dataFeatures + "Xdata.csv", delimiter=",") price = np.loadtxt(folder_dataFeatures + "price.csv", delimiter=",") price = price.reshape(Y_data.size,1) # TODO: Just to put it in the sahpe as it was before writing it to disk dates = np.loadtxt(folder_dataFeatures + "dates.csv", delimiter=",") ## Generate the Y variable to estimate lag = 20 Y_data = bMA.get_return(price, lag = lag) Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN) if (model_OLS): # Makes use of the pandas structures ############################################################################## # Multilinear regression model, calculating fit, P-values, confidence # intervals etc. # Fit the model model = ols("Y ~ MACD + RSI + ATR + MACD_vel + ATR_vel + RSI_vel", data).fit() params = model._results.params # Print the summary print(model.summary()) print("OLS model Parameters") print(params) # Peform analysis of variance on fitted linear model #anova_results = anova_lm(model)
## Create Pandas Data Frame for the information of the ML problem data_df = pd.DataFrame({'Time': days_keys, 'Target_clas': Target_bin, 'Target_reg': Target}) # 'Current_diff': Target} data_df.set_index('Time',inplace = True) """ ######################################################### CREATE WINDOWED VECTOR OF FEATURES !! ######################################################### """ Nlast_Close = 2 # The last Diff in Close and increase for i in range(Nlast_Close): # data_df.shift() data_df["Diff_prevC_%i"%(i+1)] = bMl.shift(Target,lag = i+1) # We encode it as categorical !!! data_df["Diff_prevC_bin_%i"%(i+1)] = bMl.shift(Target_bin,lag = i+1) # data_df["Diff_prevC_bin_%i"%(i+1)] = pd.Categorical(data_df["Diff_prevC_bin_%i"%(i+1)]).codes Nlast_Range = 2 for i in range(Nlast_Range): # data_df.shift() data_df["Diff_prevRangeHL_%i"%(i+1)] = bMl.shift(H-L,lag = i+1) data_df["Diff_prevRangeCO_%i"%(i+1)] = bMl.shift(C-O,lag = i+1) Nlast_Price = 1 for i in range(Nlast_Range): # data_df.shift() data_df["prevClose_%i"%(i+1)] = bMl.shift(C,lag = i+1)
def add_lagged_values(df, feature_vector, name_feature, Nlags = 1): # Function that adds lagged values to the dataframe for i in range(Nlags): signal = bMl.shift(feature_vector,lag = i+1).flatten() # print (type(signal)) df[name_feature + "_%i"%(i+1)] =signal
# easier for algorithms to learn from it day_of_week = np.array(data_df.index.dayofweek) week_of_year = np.array(data_df.index.weekofyear) year = np.array(data_df.index.year) ## Add the lagged value to the database Nlag_time_information = 1 tut.add_lagged_values(data_df,day_of_week,"day",Nlag_time_information) tut.add_lagged_values(data_df,week_of_year,"week",Nlag_time_information) tut.add_lagged_values(data_df,year,"year",Nlag_time_information) ################ OCHL variables ################## # Variables trivially obtained from daily OHCL Target = Target # Increase in Close price Range_HL = H-L # measure of volatility Daily_gap = O - bMl.shift(C,lag = 1).flatten() # measure of price movement ## Add the lagged value to the database Nlag_OCHL_information = 3 tut.add_lagged_values(data_df,Target,"Target",Nlag_OCHL_information) tut.add_lagged_values(data_df,Range_HL,"Range_HL",Nlag_OCHL_information) tut.add_lagged_values(data_df,Daily_gap,"Daily_gap",Nlag_OCHL_information) ################## Daily Trading Indicators #################### # Hulls_average !! ACDC, Volatility, ATR, Short nHMA = 20 ## Hulls Average, reactive but smoothing MA HMA = indl.get_HMA(timeData_daily.get_timeSeries(["Close"]), nHMA) ## Volatility
def add_lagged_values(df, feature_vector, name_feature, Nlags=1): # Function that adds lagged values to the dataframe for i in range(Nlags): signal = bMl.shift(feature_vector, lag=i + 1).flatten() # print (type(signal)) df[name_feature + "_%i" % (i + 1)] = signal