Example #1
0
def get_timeSeriesReturn(self, transform="log"):
    # Gets the Return of the Time Series, if it has not been created yet, then it creates it
    # if (self.timeSeries == []):  # Check existence of timeSeries
    transform = "pene"
    # We will try as well to get the return of the first datapoint
    # if we actually have it in the database. For this, we check our mask.
    # If the first "1" found is not at 0, we can do this

    self.get_timeSeries(transform="tus muertos")

    pos1 = (self.time_mask).tolist().index(1)

    if (pos1 > 0):  # If we actually have more signal.
        ps = self.TD[self.seriesNames].iloc[pos1 - 1]
        ps = np.array(ps).T
        ps = ps.reshape(ps.size / len(self.seriesNames), len(self.seriesNames))
        #        print ps
        #        print self.timeSeries.shape
        #        print ps.shape
        self.timeSeriesReturn = bMl.get_return(
            np.concatenate((ps, self.timeSeries), axis=0))
        self.timeSeriesReturn = self.timeSeriesReturn[1:, :]
    else:
        self.timeSeriesReturn = bMl.get_return(self.timeSeries)

    if (transform == "log"):
        ## We perform log of this shit + 1 to get the log returns
        self.timeSeriesReturn = np.log(self.timeSeriesReturn + 1)

    return copy.deepcopy(self.timeSeriesReturn)
Example #2
0
def Chaikin_vol(df, n=14):
    HLRange = df['High'] - df['Low']
    EMA = HLRange.ewm(span=n, min_periods=n).mean()
    Chikinin_volat = bMA.get_return(ul.fnp(EMA), lag=n, cval=np.NaN)
    Chikinin_volat = ul.fnp(Chikinin_volat)

    return [ul.fnp(EMA), Chikinin_volat]
Example #3
0
def Chaikin_vol(df, n = 14):  
    HLRange = df['High'] - df['Low']
    EMA = HLRange.ewm( span = n, min_periods = n).mean()
    Chikinin_volat = bMA.get_return(ul.fnp(EMA), lag = n, cval = np.NaN)
    Chikinin_volat = ul.fnp(Chikinin_volat)
    
    return [ul.fnp(EMA), Chikinin_volat]
Example #4
0
def get_timeSeriesReturn(self, seriesNames=[], indexes=[], transform="no"):
    # Gets the Return of the Time Series, if it has not been created yet, then it creates it
    # if (self.timeSeries == []):  # Check existence of timeSeries

    # We will try as well to get the return of the first datapoint
    # if we actually have it in the database. For this, we check our mask.
    # If the first "1" found is not at 0, we can do this
    self.set_inner_timeSeries(seriesNames, indexes)
    timeSeries = self.get_timeSeries(seriesNames,
                                     indexes,
                                     transform="tus muertos")

    # Position of the first sample we are using
    #    pos1 = self.time_mask[0]

    # TODO. Make it work for series Names not in the dataset.
    #    if (pos1 > 0 and self.period >= 1440): # If we actually have more signal.
    if (0):
        # We could compute the real previous return by concatenating the previous
        # sample, computing the return and then removinf the first 0

        # For now it only works if the time series is one of the originals, not the
        # transformations, because then we have to get the transformation and we dont want to
        #        ps = self.TD[self.seriesNames].iloc[pos1-1]
        #        ps = np.array(ps).T
        #        ps = ps.reshape(ps.size/len(self.seriesNames), len(self.seriesNames))

        ## We obtain the returns of the signal adding the previous.

        timeSeriesPlus = self.get_timeSeries(
            indexes=np.insert(indexes, 0, pos1 - 1))
        #        print timeSeriesPlus.shape
        self.timeSeriesReturn = bMl.get_return(timeSeriesPlus)
        self.timeSeriesReturn = self.timeSeriesReturn[1:, :]
    else:
        self.timeSeriesReturn = bMl.get_return(timeSeries)

    if (transform == "log"):
        ## We perform log of this shit + 1 to get the log returns
        self.timeSeriesReturn = np.log(self.timeSeriesReturn + 1)

    return copy.deepcopy(self.timeSeriesReturn)
RSI_vel = indl.get_SMA(ATR_vel, L=nsmooth_vel)

###########################################################
################# PREPARE THE DATA ########################
###########################################################
X_data = np.concatenate((MACD[:, [indx]], MACD_vel[:, [indx]]), axis=1)
X_data = np.concatenate((X_data, RSI[:, [indx]], ATR[:, [indx]]), axis=1)
X_data = np.concatenate((X_data, RSI_vel[:, [indx]], ATR_vel[:, [indx]]),
                        axis=1)

Y_data = bMA.diff(prices[:, indx], lag=lag, cval=np.NaN)
Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN)

### Returns
lag_ret = 20
return_Ydata = bMA.get_return(prices[:, [indx]], lag=lag_ret)
reconstruct_Ydata = bMA.reconstruc_return(prices[:, [indx]],
                                          return_Ydata,
                                          lag=lag_ret)

gl.plot([], prices[:, [indx]], legend=["price"])
gl.plot([], reconstruct_Ydata, nf=0, legend=["reconstruction"])
gl.plot([], return_Ydata, nf=0, na=1, legend=["return"])

Y_data = return_Ydata
Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN)


def filter_by_risk():
    # This funciton will filter the samples used in the analysis
    #
Example #6
0
# We create the data in the previous file up to some point and
# we read it with this one.

######## PANDAS FORMAT
folder_dataFeatures = "./data/"
data = pd.read_csv(folder_dataFeatures + "dataPandas.csv", sep = ',', index_col = 0, 
                      dtype = {"Date":dt.datetime})
data.index = ul.str_to_datetime (data.index.tolist())
######## NUMPY ARRAYS FORMAT
X_data = np.loadtxt(folder_dataFeatures + "Xdata.csv", delimiter=",")
price = np.loadtxt(folder_dataFeatures + "price.csv", delimiter=",")
price = price.reshape(Y_data.size,1) # TODO: Just to put it in the sahpe as it was before writing it to disk
dates = np.loadtxt(folder_dataFeatures + "dates.csv", delimiter=",")
## Generate the Y variable to estimate
lag = 20
Y_data = bMA.get_return(price, lag = lag)
Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN)

if (model_OLS):
    # Makes use of the pandas structures
    ##############################################################################
    # Multilinear regression model, calculating fit, P-values, confidence
    # intervals etc.
    # Fit the model
    model = ols("Y ~ MACD + RSI + ATR + MACD_vel  + ATR_vel + RSI_vel", data).fit()
    params = model._results.params
    # Print the summary
    print(model.summary())
    print("OLS model Parameters")
    print(params)
    # Peform analysis of variance on fitted linear model
RSI_vel = indl.get_SMA(ATR_vel, L = nsmooth_vel)

###########################################################
################# PREPARE THE DATA ########################
###########################################################
X_data = np.concatenate((MACD[:,[indx]],MACD_vel[:,[indx]]), axis = 1)
X_data = np.concatenate((X_data,RSI[:,[indx]], ATR[:,[indx]]), axis = 1)
X_data = np.concatenate((X_data,RSI_vel[:,[indx]], ATR_vel[:,[indx]]), axis = 1)

Y_data = bMA.diff(prices[:,indx], lag = lag, cval = np.NaN)
Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN)


### Returns 
lag_ret = 20
return_Ydata = bMA.get_return(prices[:,[indx]], lag = lag_ret)
reconstruct_Ydata = bMA.reconstruc_return(prices[:,[indx]], return_Ydata, lag = lag_ret)

gl.plot([],prices[:,[indx]], legend= ["price"])
gl.plot([],reconstruct_Ydata, nf = 0, legend= ["reconstruction"])
gl.plot([],return_Ydata, nf = 0, na = 1, legend= ["return"])

Y_data = return_Ydata
Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN)


def filter_by_risk():
    # This funciton will filter the samples used in the analysis 
    # 
    pass
    # We also should analyse abs(ret) pare detectar que cuando hay mucho riesgo
folder_dataFeatures = "./data/"
data = pd.read_csv(folder_dataFeatures + "dataPandas.csv",
                   sep=',',
                   index_col=0,
                   dtype={"Date": dt.datetime})
data.index = ul.str_to_datetime(data.index.tolist())
######## NUMPY ARRAYS FORMAT
X_data = np.loadtxt(folder_dataFeatures + "Xdata.csv", delimiter=",")
price = np.loadtxt(folder_dataFeatures + "price.csv", delimiter=",")
price = price.reshape(
    Y_data.size,
    1)  # TODO: Just to put it in the sahpe as it was before writing it to disk
dates = np.loadtxt(folder_dataFeatures + "dates.csv", delimiter=",")
## Generate the Y variable to estimate
lag = 20
Y_data = bMA.get_return(price, lag=lag)
Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN)

if (model_OLS):
    # Makes use of the pandas structures
    ##############################################################################
    # Multilinear regression model, calculating fit, P-values, confidence
    # intervals etc.
    # Fit the model
    model = ols("Y ~ MACD + RSI + ATR + MACD_vel  + ATR_vel + RSI_vel",
                data).fit()
    params = model._results.params
    # Print the summary
    print(model.summary())
    print("OLS model Parameters")
    print(params)