df = df.loc[bl]

bl = df['tau0'] > MIN_TAU
cm.print_removal(
    df.shape[0], sum(bl), ori_size,
    f'We remove samples that have time to maturity less than {MIN_TAU_Days} day'
)
df = df.loc[bl]
"""
1. We choose out-of-money calls and puts only.
2. We further restrict the range of moneyness, so that deep out-of-money are excluded.
"""
# To make sure no NAs in columns
assert df['delta_bs'].isna().sum() < 0.5

df = laux.choose_half_shrink_moneyness(df, ori_size, HALF_MONEY, MIN_M, MAX_M)

df = laux.make_features(df)
"""
Rolling window
"""

df, df_dates = laux.rolling_window(df,
                                   date_begin=setup.DATE_BEGIN,
                                   span_train=setup.SPAN_TRAIN,
                                   span_val=setup.SPAN_VAL,
                                   span_test=setup.SPAN_TEST,
                                   date_window=setup.DATE_WINDOW,
                                   date_end=setup.DATE_END,
                                   offset=BDay(1))
    end_date=setup.DATE_BREAK)
laux.tag_data(
    df, tag=1, period=0,
    offset=setup.OFFSET_DICT[setup.FREQ][0],
    start_date=setup.DATE_BREAK,
    end_date=setup.UNDERLYINGPARAS['end_date']
)


print("Load and clean the training and validation data.")
print(f'Original data size is {df.shape[0]}')


# Remove certain types of samples from training and validation sets.

df = laux.choose_half_shrink_moneyness(df, ori_size, setup.HALF_MONEY, setup.MIN_M, setup.MAX_M)
bl = df['V1'].notna()
cm.print_removal(df.shape[0], sum(bl), ori_size, 'We remove samples when S1 is not available')

df_train = df.loc[bl]
df_train = laux.make_features(df_train)
del df


print("\n\n====================")
print("Clean and load all Monte Carlo test data.\n")


# Import all the monte carlo sets together, and do the same selection for safty reason. Do not select any data in other part of the code.

mc_sets = []