def generate_data_reg():
    data = pd.read_csv(dd.file_name("data"))
    data_norm = pd.read_csv(dd.file_name("data_normalized"))

    data_norm = data_norm.drop(["Signal"], axis=1)
    y = generate_y_reg(data, 'Close').shift(-1)
    data_norm.insert(data_norm.columns.get_loc('Date') + 1, 'Y', y)
    data_norm.dropna().to_csv(dd.file_name("data_reg"), index=False, float_format='%.9f')
# def equalize_close_open()


def generate_y(df, col_name):
    diff = df[col_name].diff(periods=-1)
    diff.values[diff.values == 0] = SIGNALS.HOLD()
    diff.values[diff.values > 0] = SIGNALS.SELL()
    diff.values[diff.values < 0] = SIGNALS.BUY()
    return diff


def log_returns(df, col_name):
    ratio = df[col_name] / df[col_name].shift(1)
    return np.log(ratio)


def standardize(df, col_name):
    col = df[col_name]
    mean = col.mean()
    std = col.std()
    return ((col - mean) / std)


# def harmonize_dates():
SP_file_name = dd.file_name("SP", dd.interval_period)
SP = pd.read_csv(SP_file_name)
print(standardize(SP, "Volume"))
# SP[SIGNALS.SIGNAL()] = generate_y(SP, "Close")
# SP.to_csv(SP_file_name)