periods_test = DataFrame(
    np.zeros((20, 7)),
    columns=[
        int(ser_max / 100),
        int(ser_max / 50),
        int(ser_max / 20),
        int(ser_max / 10),
        int(ser_max / 5),
        int(ser_max / 2),
        ser_max,
    ],
)
for i in periods_test.index:  # Sampling 20 times
    for j in periods_test.columns:
        sample = test.reindex(columns=np.random.permutation(test.columns)[:j])
        periods_test.ix[i, j] = sample.iloc[0].corr(sample.iloc[1])  # ix is for label index, iloc is for int index
print periods_test[:5]
print periods_test.describe()

threshold = 0.1
temp_std = 0
# Take the threshold num which makes sampling correlation stable
for i, std in enumerate(periods_test.std()):
    if std < 0.1 and temp_std >= 0.1:
        mini_period = periods_test.columns[i]
        break
    temp_std = std

# Decide the value of min_periods. Set std 0.05 as threshold
# mini_period = 200
check_size = int(len(data.index) * 0.2)  # 20% dataset for testing