Esempio n. 1
0
 def get_Residuals_LinearModel(X_data, price, lag = 20):
     # This functions gets the residuals when we train a linear model with 
     # the X_data into predictiong the price return with a given lag.
 
     ## Prediction of the thingies
     Y_data = bMA.diff(price, lag = lag, cval = np.NaN)
     Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN)
 
     ## Eliminate the Nans !! Even if they appear in just one dim
     mask_X = np.sum(np.isnan(X_data), axis = 1) == 0
     mask_Y = np.isnan(Y_data) == 0
     mask = mask_X & mask_Y[:,0]
     
     # Create linear regression object
     regr = linear_model.LinearRegression()
     # Train the model using the training sets
     regr.fit(X_data[mask,:], Y_data[mask,:])
     
     #    coeffs = np.array([regr.intercept_, regr.coef_])[0]
     coeffs = np.append(regr.intercept_, regr.coef_)
     params = np.array(coeffs)
 
     residual = regr.residues_
     residual = regr.score(X_data[mask,:], Y_data[mask,:])
     return residual
    def get_Residuals_LinearModel(X_data, price, lag=20):
        # This functions gets the residuals when we train a linear model with
        # the X_data into predictiong the price return with a given lag.

        ## Prediction of the thingies
        Y_data = bMA.diff(price, lag=lag, cval=np.NaN)
        Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN)

        ## Eliminate the Nans !! Even if they appear in just one dim
        mask_X = np.sum(np.isnan(X_data), axis=1) == 0
        mask_Y = np.isnan(Y_data) == 0
        mask = mask_X & mask_Y[:, 0]

        # Create linear regression object
        regr = linear_model.LinearRegression()
        # Train the model using the training sets
        regr.fit(X_data[mask, :], Y_data[mask, :])

        #    coeffs = np.array([regr.intercept_, regr.coef_])[0]
        coeffs = np.append(regr.intercept_, regr.coef_)
        params = np.array(coeffs)

        residual = regr.residues_
        residual = regr.score(X_data[mask, :], Y_data[mask, :])
        return residual
    # easier for algorithms to learn from it
    day_of_week = np.array(data_df.index.dayofweek)
    week_of_year = np.array(data_df.index.weekofyear)
    year = np.array(data_df.index.year)

    ## Add the lagged value to the database
    Nlag_time_information = 1
    tut.add_lagged_values(data_df, day_of_week, "day", Nlag_time_information)
    tut.add_lagged_values(data_df, week_of_year, "week", Nlag_time_information)
    tut.add_lagged_values(data_df, year, "year", Nlag_time_information)

    ################  OCHL variables ##################
    # Variables trivially obtained from daily OHCL
    Target = Target  # Increase in Close price
    Range_HL = H - L  # measure of volatility
    Daily_gap = O - bMl.shift(C, lag=1).flatten()  # measure of price movement

    ## Add the lagged value to the database
    Nlag_OCHL_information = 3
    tut.add_lagged_values(data_df, Target, "Target", Nlag_OCHL_information)
    tut.add_lagged_values(data_df, Range_HL, "Range_HL", Nlag_OCHL_information)
    tut.add_lagged_values(data_df, Daily_gap, "Daily_gap",
                          Nlag_OCHL_information)

    ################## Daily Trading Indicators ####################
    # Hulls_average !! ACDC, Volatility, ATR, Short
    nHMA = 20
    ## Hulls Average, reactive but smoothing MA
    HMA = indl.get_HMA(timeData_daily.get_timeSeries(["Close"]), nHMA)

    ## Volatility
## ATR Parameter !
ATR = Cartera.ATR(n=14)
ATR_vel = bMA.diff(ATR, n=1)
RSI_vel = indl.get_SMA(ATR_vel, L=nsmooth_vel)

###########################################################
################# PREPARE THE DATA ########################
###########################################################
X_data = np.concatenate((MACD[:, [indx]], MACD_vel[:, [indx]]), axis=1)
X_data = np.concatenate((X_data, RSI[:, [indx]], ATR[:, [indx]]), axis=1)
X_data = np.concatenate((X_data, RSI_vel[:, [indx]], ATR_vel[:, [indx]]),
                        axis=1)

Y_data = bMA.diff(prices[:, indx], lag=lag, cval=np.NaN)
Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN)

### Returns
lag_ret = 20
return_Ydata = bMA.get_return(prices[:, [indx]], lag=lag_ret)
reconstruct_Ydata = bMA.reconstruc_return(prices[:, [indx]],
                                          return_Ydata,
                                          lag=lag_ret)

gl.plot([], prices[:, [indx]], legend=["price"])
gl.plot([], reconstruct_Ydata, nf=0, legend=["reconstruction"])
gl.plot([], return_Ydata, nf=0, na=1, legend=["return"])

Y_data = return_Ydata
Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN)
Esempio n. 5
0
# we read it with this one.

######## PANDAS FORMAT
folder_dataFeatures = "./data/"
data = pd.read_csv(folder_dataFeatures + "dataPandas.csv", sep = ',', index_col = 0, 
                      dtype = {"Date":dt.datetime})
data.index = ul.str_to_datetime (data.index.tolist())
######## NUMPY ARRAYS FORMAT
X_data = np.loadtxt(folder_dataFeatures + "Xdata.csv", delimiter=",")
price = np.loadtxt(folder_dataFeatures + "price.csv", delimiter=",")
price = price.reshape(Y_data.size,1) # TODO: Just to put it in the sahpe as it was before writing it to disk
dates = np.loadtxt(folder_dataFeatures + "dates.csv", delimiter=",")
## Generate the Y variable to estimate
lag = 20
Y_data = bMA.get_return(price, lag = lag)
Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN)

if (model_OLS):
    # Makes use of the pandas structures
    ##############################################################################
    # Multilinear regression model, calculating fit, P-values, confidence
    # intervals etc.
    # Fit the model
    model = ols("Y ~ MACD + RSI + ATR + MACD_vel  + ATR_vel + RSI_vel", data).fit()
    params = model._results.params
    # Print the summary
    print(model.summary())
    print("OLS model Parameters")
    print(params)
    # Peform analysis of variance on fitted linear model
    #anova_results = anova_lm(model)
Esempio n. 6
0
## Create Pandas Data Frame for the information of the ML problem

data_df = pd.DataFrame({'Time': days_keys, 'Target_clas': Target_bin,  'Target_reg': Target})  #  'Current_diff': Target} 
data_df.set_index('Time',inplace = True)

"""
#########################################################
CREATE WINDOWED VECTOR OF FEATURES !!
#########################################################
"""

Nlast_Close = 2  # The last Diff in Close and increase
for i in range(Nlast_Close):
#    data_df.shift()
    data_df["Diff_prevC_%i"%(i+1)] = bMl.shift(Target,lag = i+1)
    
    # We encode it as categorical !!! 
    data_df["Diff_prevC_bin_%i"%(i+1)] = bMl.shift(Target_bin,lag = i+1)
#    data_df["Diff_prevC_bin_%i"%(i+1)] = pd.Categorical(data_df["Diff_prevC_bin_%i"%(i+1)]).codes
    
Nlast_Range = 2
for i in range(Nlast_Range):
#    data_df.shift()
    data_df["Diff_prevRangeHL_%i"%(i+1)] = bMl.shift(H-L,lag = i+1)
    data_df["Diff_prevRangeCO_%i"%(i+1)] = bMl.shift(C-O,lag = i+1)

Nlast_Price = 1
for i in range(Nlast_Range):
#    data_df.shift()
    data_df["prevClose_%i"%(i+1)] = bMl.shift(C,lag = i+1)
Esempio n. 7
0
def add_lagged_values(df, feature_vector, name_feature, Nlags = 1):
    # Function that adds lagged values to the dataframe
    for i in range(Nlags):
        signal = bMl.shift(feature_vector,lag = i+1).flatten()
        # print (type(signal))
        df[name_feature + "_%i"%(i+1)] =signal
Esempio n. 8
0
## Create Pandas Data Frame for the information of the ML problem

data_df = pd.DataFrame({'Time': days_keys, 'Target_clas': Target_bin,  'Target_reg': Target})  #  'Current_diff': Target} 
data_df.set_index('Time',inplace = True)

"""
#########################################################
CREATE WINDOWED VECTOR OF FEATURES !!
#########################################################
"""

Nlast_Close = 2  # The last Diff in Close and increase
for i in range(Nlast_Close):
#    data_df.shift()
    data_df["Diff_prevC_%i"%(i+1)] = bMl.shift(Target,lag = i+1)
    
    # We encode it as categorical !!! 
    data_df["Diff_prevC_bin_%i"%(i+1)] = bMl.shift(Target_bin,lag = i+1)
#    data_df["Diff_prevC_bin_%i"%(i+1)] = pd.Categorical(data_df["Diff_prevC_bin_%i"%(i+1)]).codes
    
Nlast_Range = 2
for i in range(Nlast_Range):
#    data_df.shift()
    data_df["Diff_prevRangeHL_%i"%(i+1)] = bMl.shift(H-L,lag = i+1)
    data_df["Diff_prevRangeCO_%i"%(i+1)] = bMl.shift(C-O,lag = i+1)

Nlast_Price = 1
for i in range(Nlast_Range):
#    data_df.shift()
    data_df["prevClose_%i"%(i+1)] = bMl.shift(C,lag = i+1)
 # easier for algorithms to learn from it
 day_of_week = np.array(data_df.index.dayofweek)
 week_of_year = np.array(data_df.index.weekofyear)
 year = np.array(data_df.index.year)
 
 ## Add the lagged value to the database
 Nlag_time_information = 1
 tut.add_lagged_values(data_df,day_of_week,"day",Nlag_time_information)
 tut.add_lagged_values(data_df,week_of_year,"week",Nlag_time_information)
 tut.add_lagged_values(data_df,year,"year",Nlag_time_information)
 
 ################  OCHL variables ##################
 # Variables trivially obtained from daily OHCL
 Target = Target # Increase in Close price
 Range_HL = H-L # measure of volatility
 Daily_gap =  O - bMl.shift(C,lag = 1).flatten() # measure of price movement
 
 ## Add the lagged value to the database
 Nlag_OCHL_information = 3
 tut.add_lagged_values(data_df,Target,"Target",Nlag_OCHL_information)
 tut.add_lagged_values(data_df,Range_HL,"Range_HL",Nlag_OCHL_information)
 tut.add_lagged_values(data_df,Daily_gap,"Daily_gap",Nlag_OCHL_information)
 
 
 ################## Daily Trading Indicators ####################
 # Hulls_average !! ACDC, Volatility, ATR, Short 
 nHMA = 20
 ## Hulls Average, reactive but smoothing MA
 HMA  = indl.get_HMA(timeData_daily.get_timeSeries(["Close"]), nHMA)  
 
 ## Volatility
Esempio n. 10
0
def add_lagged_values(df, feature_vector, name_feature, Nlags=1):
    # Function that adds lagged values to the dataframe
    for i in range(Nlags):
        signal = bMl.shift(feature_vector, lag=i + 1).flatten()
        # print (type(signal))
        df[name_feature + "_%i" % (i + 1)] = signal