def generate_data_reg(): data = pd.read_csv(dd.file_name("data")) data_norm = pd.read_csv(dd.file_name("data_normalized")) data_norm = data_norm.drop(["Signal"], axis=1) y = generate_y_reg(data, 'Close').shift(-1) data_norm.insert(data_norm.columns.get_loc('Date') + 1, 'Y', y) data_norm.dropna().to_csv(dd.file_name("data_reg"), index=False, float_format='%.9f')
# def equalize_close_open() def generate_y(df, col_name): diff = df[col_name].diff(periods=-1) diff.values[diff.values == 0] = SIGNALS.HOLD() diff.values[diff.values > 0] = SIGNALS.SELL() diff.values[diff.values < 0] = SIGNALS.BUY() return diff def log_returns(df, col_name): ratio = df[col_name] / df[col_name].shift(1) return np.log(ratio) def standardize(df, col_name): col = df[col_name] mean = col.mean() std = col.std() return ((col - mean) / std) # def harmonize_dates(): SP_file_name = dd.file_name("SP", dd.interval_period) SP = pd.read_csv(SP_file_name) print(standardize(SP, "Volume")) # SP[SIGNALS.SIGNAL()] = generate_y(SP, "Close") # SP.to_csv(SP_file_name)