def stockLinearRegression(stock_index, start_date, end_date, columns_in):
    '''
    columns_in = "adj_close"
    stock_index="000545"
    start_date = "2018-09-02"
    end_date = "2018-09-12"
    '''
    spark = loadSpark()
    table = "stock_dev.day_history_insert"
    sql1 = """ select %s,stock_date from %s 
    where stock_date >= '%s' and stock_date <= '%s' and 
    stock_index = '%s' order by stock_date""" % (columns_in, table, start_date,
                                                 end_date, stock_index)
    my_dataframe = spark.sql(sql1)
    df_cnt = my_dataframe.count()
    #logging.info("DATA FRAME rows".format(str(df_cnt)))
    if df_cnt == 0:
        logging.info("no data find out")
        return -999
    else:
        logging.info("DATA FRAME rows {}".format(str(df_cnt)))
        df1 = my_dataframe.toPandas()
        df1['norm_col'] = (df1['adj_close'] - df1['adj_close'].min()) / (
            df1['adj_close'].max() - df1['adj_close'].min() + 0.001)
        logging.info(df1.head(10))
        logging.info(df1.tail(10))
        slope_out = LinearReg.single_linear_reg(df1, 'norm_col')[0]
        np.save("slope_out.npy", slope_out)
    return slope_out
Exemple #2
0
def apply_linear_reg(x):
    #x_in = x['adj_close']
    try:
        x.loc[:, 'norm_col'] = norm_col(x, 'adj_close')
        slope_out = LinearReg.single_linear_reg(x, 'norm_col')[0]
    except:
        slope_out = -9999
    return slope_out
Exemple #3
0
def SH_slope(SH_index_table, start_date, end_date):
    '''
    calculate the SH index slope give start and end date
    @ return SH_slope a float
    '''
    sql_SH_index = """
        select * from 
        %s where stock_date >= '%s'
        and stock_date <= '%s'
    """ % (SH_index_table, start_date, end_date)
    SH_index_df = spark.sql(sql_SH_index)
    SH_index_df1 = SH_index_df.toPandas()
    SH_index_df1.loc[:, 'norm_col'] = norm_col(SH_index_df1, 'adj_close')
    SH_slope = LinearReg.single_linear_reg(SH_index_df1, 'norm_col')[0]
    return SH_slope
Exemple #4
0
def rolling_regression(x, window, sort_col, reg_col):
    '''
    @param: x is a dataframe
    @param: window: regression window
    @param: sort_col: which column sort the DF: e.g. stock_date
    @param: reg_col: which column need to do regression:  e.g. adj_close
    '''
    loop_len = x.shape[0]
    slope = []
    num_in = []
    x = x.sort_values(sort_col)
    for i in range(0, loop_len):
        st_index = i
        end_index = i + window
        try:
            df3 = x.iloc[st_index:end_index, :]
            num_in.append(df3.shape[0])
            slope1, inter = LinearReg.single_linear_reg(df3, reg_col)
            slope.append(slope1)
        except:
            slope.append(-999)
    x['slopes'] = slope
    x['slope_num_in'] = num_in
    return x
Exemple #5
0
test_y1 = test_y[0:40].tolist()
p1 = yhat[0:40].tolist()
predict_y1 = list(itertools.chain(*p1))
data_dict = {"test_Y": test_y1, "predict_Y": predict_y1}
df_out = pd.DataFrame(data_dict)
df_out.to_csv("test.csv")

from sklearn.metrics import mean_squared_error
import numpy as np
predict_num = 60
print(mean_squared_error(test_y[0:40], yhat[0:40]))
print(mean_squared_error(test_y[0:40], yhat[0:40]) / np.mean(test_y[0:40]))

from davidyu_cfg import *
from functions.LinearReg import LinearReg
linear_reg = LinearReg()
test_yy = pd.DataFrame(test_y[0:40])
test_yy.columns = ["col_in"]
yhat_yy = pd.DataFrame(yhat[0:40])
yhat_yy.columns = ["col_in"]

print(linear_reg.single_linear_reg(test_yy, "col_in")[0])
print(linear_reg.single_linear_reg(yhat_yy, "col_in")[0])
'''
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
Exemple #6
0
 def linearRegPred(self):
     DF = self.data_select_pred().df_pred
     slope = LinearReg.single_linear_reg(DF, 'adj_close')[0]
     return slope
from davidyu_cfg import *
from functions.LinearReg import LinearReg

def rolling_regression(x,window):
    '''
    @param: x is a dataframe
    '''
	loop_len = x.shape[0]
	slope = []
    num_in = []
	for i in range(0,loop_len):
	    st_index = i
	    end_index = i+window
        try:
	        df3 = x.iloc[st_index:end_index,:]
            num_in.append(df3.shape[0])
	        slope1,inter = LinearReg.single_linear_reg(df3,'adj_close')
	        slope.append(slope1)
	    except:
	        slope.append(-999)
    x['slopes'] = slope
    x['slope_num_in'] = num_in
    return x