Esempio n. 1
0
 def transform(self, X):
     fe = make_features(X, self.target_col, self.moving_averages)
     if self.production:
         X = series_to_predict_matrix(fe, n_in=self.Tx, dropnan=True)
         return self._reshape(X)
     else:
         X, y = data_to_supervised(fe, self.Tx, self.Ty)
         return self._reshape(X), y
Esempio n. 2
0
 def transform(self, X):
     fe = make_single_feature(X, self.target_col)
     if self.production:
         X = series_to_predict_matrix(fe.target.tolist(),
                                      n_in=self.Tx,
                                      dropnan=True)
         X = continuous_wavelet_transform(X, N=self.N, wavelet=self.wavelet)
         return X
     else:
         X, y = data_to_supervised(input_df=fe, Tx=self.Tx, Ty=self.Ty)
         X = continuous_wavelet_transform(X, N=self.N, wavelet=self.wavelet)
         return X, y
Esempio n. 3
0
 def transform(self, X):
     fe = make_single_feature(X, self.target_col)
     if self.production:
         X = series_to_predict_matrix(fe['target'],
                                      n_in=self.Tx,
                                      dropnan=True)
         X = discrete_wavelet_transform(X, wavelet=self.wavelet)
         return self._reshape(X)
     else:
         X, y = data_to_supervised(input_df=fe[['target']],
                                   Tx=self.Tx,
                                   Ty=self.Ty)
         X = discrete_wavelet_transform(X, wavelet=self.wavelet)
         return self._reshape(X), y
def main():
    print('Making features from raw data...')

    data_dir = join(get_project_path(), 'data', 'raw')
    output_dir = join(get_project_path(), 'data', 'processed')
    makedirs(output_dir, exist_ok=True)

    coins = ['BTC', 'ETH']
    TARGET = 'close'
    Tx = 72
    Ty = 1
    TEST_SIZE = 0.05

    for SYM in coins:
        raw_data_path = join(data_dir, SYM + '.csv')
        print('Featurizing raw {} data from {}...'.format(SYM, raw_data_path))

        raw_df = pd.read_csv(raw_data_path, index_col=0)

        feature_df = make_features(
            raw_df,
            target_col=TARGET,
            keep_cols=['close', 'high', 'low', 'volumeto', 'volumefrom'],
            ma_lags=[6, 12, 24, 48],
            ma_cols=['close', 'volumefrom', 'volumeto'])

        X, y = data_to_supervised(feature_df, target_ix=-1, Tx=Tx, Ty=Ty)

        num_features = int(X.shape[1] / Tx)
        X = make_3d(X, tx=Tx, num_channels=num_features)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=TEST_SIZE, shuffle=False)

        np.save(arr=X_train, file=join(output_dir, 'X_train_{}'.format(SYM)))
        np.save(arr=X_test, file=join(output_dir, 'X_test_{}'.format(SYM)))
        np.save(arr=y_train, file=join(output_dir, 'y_train_{}'.format(SYM)))
        np.save(arr=y_test, file=join(output_dir, 'y_test_{}'.format(SYM)))
Esempio n. 5
0
plt.title('target')
# plt.figure(); plt.plot(df.filter(regex='v(t|f)')); plt.title('v(t|f)')
plt.show()

# In[7]:

num_features = arr.shape[1] - pc['ty']
p('Number of Unique Features:', num_features)
p('Number of Hours per Sample:', pc['tx'])
p('Total Features per Sample:', pc['tx'] * num_features)

# In[8]:

X, y = data_to_supervised(input_df=pd.DataFrame(
    data=arr, columns=ct.get_feature_names()),
                          target_ix=-1,
                          Tx=pc['tx'],
                          Ty=pc['ty'])
p(X.head(2))
p(y.head(5))

# In[9]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=pc['test_fraction'], shuffle=False)
p('Train shape: ', X_train.shape)
p('Test shape: ', X_test.shape)

# In[10]:

fig, ax = plt.subplots(1, figsize=(10, 5))
Tx = 72
Ty = 1
TEST_SIZE = 0.05
data_path = join(get_project_path(), 'data', 'raw', SYM + '.csv')

# In[3]:

data = pd.read_csv(data_path, index_col=0)
data.head()

# In[4]:
"""
Get percent change feature and target data.
"""
df = make_single_feature(input_df=data, target_col='close')
X, y = data_to_supervised(input_df=df[['target']], Tx=Tx, Ty=Ty)
p(X.shape, y.shape)
X.head()

# In[5]:
"""
Confirm data reshape and target/feature creation was done correctly.
"""
y_values_except_last = np.squeeze(y.iloc[:-1].values)
t_minus_1_x_values_except_first = X.iloc[1:, -1].values

y_values_except_last.all() == t_minus_1_x_values_except_first.all()

# In[6]:
"""
For comparing different transformations
Esempio n. 7
0
smooth_data_train = pd.DataFrame(smooth_arr_train,
                                 columns=ct.get_feature_names())

smooth_arr_test = ct.fit_transform(feature_data_test)
smooth_data_test = pd.DataFrame(smooth_arr_test,
                                columns=ct.get_feature_names())

smooth_data_train.plot()
plt.show()

# In[5]:
"""
Make time-series data.
"""
X_train, y_train = data_to_supervised(input_df=smooth_data_train,
                                      target_ix=-1,
                                      Tx=Tx,
                                      Ty=Ty)
X_test, y_test = data_to_supervised(input_df=smooth_data_test,
                                    target_ix=-1,
                                    Tx=Tx,
                                    Ty=Ty)
p(X_train.head())
p(y_train.head())
"""
Reshape the data into 3d array.
"""
X_train = make_3d(X_train,
                  tx=Tx,
                  num_channels=len(list(feature_data_train.columns)) + 1)
X_test = make_3d(X_test,
                 tx=Tx,
Esempio n. 8
0
SYM = 'BTC'
TARGET = 'close'
Tx = 72
Ty = 1
TEST_SIZE = 0.05

data_path = os.path.join(get_project_path(), 'data', 'raw', SYM + '.csv')
data = pd.read_csv(data_path, index_col=0)
data.head()

# In[3]:
"""
Get percent change feature and target data.
"""
df = make_features(input_df=data, target_col='close', moving_average_lags=[])
X, y = data_to_supervised(input_df=df, Tx=Tx, Ty=Ty)
p(X.shape, y.shape)
X.head()

# In[4]:
"""
Confirm data reshape and target/feature creation was done correctly.
"""
y_values_except_last = np.squeeze(y.iloc[:-1].values)
t_minus_1_x_values_except_first = X.iloc[1:, -1].values

y_values_except_last.all() == t_minus_1_x_values_except_first.all()

# In[5]:
"""
For comparing different transformations
sample_ix = 100
sample_n = 100
sample = train_smooth.iloc[sample_ix:sample_ix + sample_n]

fig, ax = plt.subplots(figsize=(12, 7))
plt.plot(sample['orig__pct_change__close'], label='raw')
plt.plot(sample['haar_smooth__pct_change__close'], label='smoothed')
plt.title('DWT Haar Smoothing')
plt.legend()
plt.show()

# In[7]:
"""
Create time-series samples.
"""
X_train, y_train = data_to_supervised(train_smooth, target_ix=-1, Tx=Tx, Ty=Ty)
X_test, y_test = data_to_supervised(test_smooth, target_ix=-1, Tx=Tx, Ty=Ty)
X_train = make_3d(X_train, tx=Tx, num_channels=2)
X_test = make_3d(X_test, tx=Tx, num_channels=2)

# In[8]:
"""
Save data.
"""
output_dir = join(get_project_path(), 'data', 'processed')

np.save(arr=X_train,
        file=join(output_dir, 'X_train_smooth_{}.npy'.format(SYM)))
np.save(arr=X_test, file=join(output_dir, 'X_test_smooth_{}.npy'.format(SYM)))
np.save(arr=y_train,
        file=join(output_dir, 'y_train_smooth_{}.npy'.format(SYM)))